From 5608005b34515cc1b12b6043377cb2c78683c07b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 11 Mar 2024 21:22:45 +0100 Subject: [PATCH 001/470] Revert "Revert "Don't allow to set max_parallel_replicas to 0 as it doesn't make sense"" --- src/Client/ConnectionPoolWithFailover.cpp | 10 ++++++++++ src/Client/HedgedConnectionsFactory.cpp | 6 +++++- src/Client/HedgedConnectionsFactory.h | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 4 ++-- .../03001_max_parallel_replicas_zero_value.reference | 0 .../03001_max_parallel_replicas_zero_value.sql | 5 +++++ 7 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference create mode 100644 tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 492fd4ae9e22..ad8ed0067d8c 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -21,6 +21,7 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ALL_CONNECTION_TRIES_FAILED; + extern const int BAD_ARGUMENTS; } @@ -191,11 +192,20 @@ std::vector ConnectionPoolWithFailover::g max_entries = nested_pools.size(); } else if (pool_mode == PoolMode::GET_ONE) + { max_entries = 1; + } else if (pool_mode == PoolMode::GET_MANY) + { + if (settings.max_parallel_replicas == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); + max_entries = settings.max_parallel_replicas; + } else + { throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode"); + } if (!priority_func) priority_func = makeGetPriorityFunc(settings); diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index f5b074a02579..703cc1f88212 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_REPLICAS_ARE_STALE; extern const int LOGICAL_ERROR; + extern const int BAD_ARGUMENTS; } HedgedConnectionsFactory::HedgedConnectionsFactory( @@ -82,7 +83,10 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode } case PoolMode::GET_MANY: { - max_entries = max_parallel_replicas; + if (max_parallel_replicas == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); + + max_entries = std::min(max_parallel_replicas, shuffled_pools.size()); break; } } diff --git a/src/Client/HedgedConnectionsFactory.h b/src/Client/HedgedConnectionsFactory.h index ce7b553acdd3..dd600d58e1e2 100644 --- a/src/Client/HedgedConnectionsFactory.h +++ b/src/Client/HedgedConnectionsFactory.h @@ -158,7 +158,7 @@ class HedgedConnectionsFactory /// checking the number of requested replicas that are still in process). size_t requested_connections_count = 0; - const size_t max_parallel_replicas = 0; + const size_t max_parallel_replicas = 1; const bool skip_unavailable_shards = 0; }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index bcedba7346d5..e28d8366aa7b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -947,7 +947,7 @@ bool InterpreterSelectQuery::adjustParallelReplicasAfterAnalysis() if (number_of_replicas_to_use <= 1) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(log, "Disabling parallel replicas because there aren't enough rows to read"); return true; } diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 7b3fb0c5c91f..0fe943e0bc7e 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -295,7 +295,7 @@ bool applyTrivialCountIfPossible( /// The query could use trivial count if it didn't use parallel replicas, so let's disable it query_context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - query_context->setSetting("max_parallel_replicas", UInt64{0}); + query_context->setSetting("max_parallel_replicas", UInt64{1}); LOG_TRACE(getLogger("Planner"), "Disabling parallel replicas to be able to use a trivial count optimization"); } @@ -756,7 +756,7 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres { planner_context->getMutableQueryContext()->setSetting( "allow_experimental_parallel_reading_from_replicas", Field(0)); - planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{0}); + planner_context->getMutableQueryContext()->setSetting("max_parallel_replicas", UInt64{1}); LOG_DEBUG(getLogger("Planner"), "Disabling parallel replicas because there aren't enough rows to read"); } else if (number_of_replicas_to_use < settings.max_parallel_replicas) diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql new file mode 100644 index 000000000000..611aa4777ba6 --- /dev/null +++ b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql @@ -0,0 +1,5 @@ +drop table if exists test_d; +create table test_d engine=Distributed(test_cluster_two_shard_three_replicas_localhost, system, numbers); +select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{serverError BAD_ARGUMENTS} +drop table test_d; + From defed923313e2cf8c33d3b0890d6a2b86e563c45 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 12 Mar 2024 11:38:27 +0000 Subject: [PATCH 002/470] do nothing in `waitForOutdatedPartsToBeLoaded()` if loading is not required --- src/Storages/MergeTree/MergeTreeData.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index d56cf761cf46..85389828e57a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1979,6 +1979,15 @@ void MergeTreeData::waitForOutdatedPartsToBeLoaded() const TSA_NO_THREAD_SAFETY_ if (isStaticStorage()) return; + /// If waiting is not required, do NOT log and do NOT enable/disable turbo mode to make `waitForOutdatedPartsToBeLoaded` a lightweight check + { + std::unique_lock lock(outdated_data_parts_mutex); + if (outdated_data_parts_loading_canceled) + throw Exception(ErrorCodes::NOT_INITIALIZED, "Loading of outdated data parts was already canceled"); + if (outdated_data_parts_loading_finished) + return; + } + /// We need to load parts as fast as possible getOutdatedPartsLoadingThreadPool().enableTurboMode(); SCOPE_EXIT({ From dd6599868adb6cbc3306a5946cae4ee3f833c138 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Mar 2024 12:06:25 +0000 Subject: [PATCH 003/470] Better check for 0 setting value --- src/Client/ConnectionPoolWithFailover.cpp | 3 -- src/Client/HedgedConnectionsFactory.cpp | 3 -- src/Core/Settings.h | 2 +- src/Core/SettingsFields.cpp | 36 +++++++++++++++++++++++ src/Core/SettingsFields.h | 15 ++++++++++ 5 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index ad8ed0067d8c..94531f58bc6a 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -197,9 +197,6 @@ std::vector ConnectionPoolWithFailover::g } else if (pool_mode == PoolMode::GET_MANY) { - if (settings.max_parallel_replicas == 0) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); - max_entries = settings.max_parallel_replicas; } else diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 703cc1f88212..6b22cc186742 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -83,9 +83,6 @@ std::vector HedgedConnectionsFactory::getManyConnections(PoolMode } case PoolMode::GET_MANY: { - if (max_parallel_replicas == 0) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "The value of the setting max_parallel_replicas must be greater than 0"); - max_entries = std::min(max_parallel_replicas, shuffled_pools.size()); break; } diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d70a6cf51c59..b23538cf2090 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -178,7 +178,7 @@ class IColumn; \ M(Bool, group_by_use_nulls, false, "Treat columns mentioned in ROLLUP, CUBE or GROUPING SETS as Nullable", 0) \ \ - M(UInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled.", 0) \ + M(NonZeroUInt64, max_parallel_replicas, 1, "The maximum number of replicas of each shard used when the query is executed. For consistency (to get different parts of the same partition), this option only works for the specified sampling key. The lag of the replicas is not controlled. Should be always greater than 0", 0) \ M(UInt64, parallel_replicas_count, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the number of parallel replicas participating in query processing.", 0) \ M(UInt64, parallel_replica_offset, 0, "This is internal setting that should not be used directly and represents an implementation detail of the 'parallel replicas' mode. This setting will be automatically set up by the initiator server for distributed queries to the index of the replica participating in query processing among parallel replicas.", 0) \ M(String, parallel_replicas_custom_key, "", "Custom key assigning work to replicas when parallel replicas are used.", 0) \ diff --git a/src/Core/SettingsFields.cpp b/src/Core/SettingsFields.cpp index 001d3e09dc97..caa8b3fdffd8 100644 --- a/src/Core/SettingsFields.cpp +++ b/src/Core/SettingsFields.cpp @@ -575,4 +575,40 @@ void SettingFieldCustom::readBinary(ReadBuffer & in) parseFromString(str); } +SettingFieldNonZeroUInt64::SettingFieldNonZeroUInt64(UInt64 x) : SettingFieldUInt64(x) +{ + checkValueNonZero(); +} + +SettingFieldNonZeroUInt64::SettingFieldNonZeroUInt64(const DB::Field & f) : SettingFieldUInt64(f) +{ + checkValueNonZero(); +} + +SettingFieldNonZeroUInt64 & SettingFieldNonZeroUInt64::operator=(UInt64 x) +{ + SettingFieldUInt64::operator=(x); + checkValueNonZero(); + return *this; +} + +SettingFieldNonZeroUInt64 & SettingFieldNonZeroUInt64::operator=(const DB::Field & f) +{ + SettingFieldUInt64::operator=(f); + checkValueNonZero(); + return *this; +} + +void SettingFieldNonZeroUInt64::parseFromString(const String & str) +{ + SettingFieldUInt64::parseFromString(str); + checkValueNonZero(); +} + +void SettingFieldNonZeroUInt64::checkValueNonZero() const +{ + if (value == 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "A setting's value has to be greater than 0"); +} + } diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index 452f3f149ab7..dc70d468851f 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -627,4 +627,19 @@ struct SettingFieldCustom void readBinary(ReadBuffer & in); }; +struct SettingFieldNonZeroUInt64 : public SettingFieldUInt64 +{ +public: + explicit SettingFieldNonZeroUInt64(UInt64 x = 1); + explicit SettingFieldNonZeroUInt64(const Field & f); + + SettingFieldNonZeroUInt64 & operator=(UInt64 x); + SettingFieldNonZeroUInt64 & operator=(const Field & f); + + void parseFromString(const String & str); + +private: + void checkValueNonZero() const; +}; + } From a065231ca15684b7ebd0c1359ede037a46c6d450 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 12 Mar 2024 12:07:36 +0000 Subject: [PATCH 004/470] Remove unused error code --- src/Client/ConnectionPoolWithFailover.cpp | 1 - src/Client/HedgedConnectionsFactory.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/src/Client/ConnectionPoolWithFailover.cpp b/src/Client/ConnectionPoolWithFailover.cpp index 94531f58bc6a..0724153b277e 100644 --- a/src/Client/ConnectionPoolWithFailover.cpp +++ b/src/Client/ConnectionPoolWithFailover.cpp @@ -21,7 +21,6 @@ namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int ALL_CONNECTION_TRIES_FAILED; - extern const int BAD_ARGUMENTS; } diff --git a/src/Client/HedgedConnectionsFactory.cpp b/src/Client/HedgedConnectionsFactory.cpp index 6b22cc186742..0fa2bc129242 100644 --- a/src/Client/HedgedConnectionsFactory.cpp +++ b/src/Client/HedgedConnectionsFactory.cpp @@ -19,7 +19,6 @@ namespace ErrorCodes extern const int ALL_CONNECTION_TRIES_FAILED; extern const int ALL_REPLICAS_ARE_STALE; extern const int LOGICAL_ERROR; - extern const int BAD_ARGUMENTS; } HedgedConnectionsFactory::HedgedConnectionsFactory( From 32410a68c136570cc19f0115a6b752f1d4cf93aa Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 13 Mar 2024 18:00:57 +0000 Subject: [PATCH 005/470] Fix tests --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index e28d8366aa7b..22bbfc044013 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -800,7 +800,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( != parallel_replicas_before_analysis) { context->setSetting("allow_experimental_parallel_reading_from_replicas", Field(0)); - context->setSetting("max_parallel_replicas", UInt64{0}); + context->setSetting("max_parallel_replicas", UInt64{1}); need_analyze_again = true; } From 4285f1a8114084b0b7af8dd3546eae1953072915 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 15 Mar 2024 13:50:03 +0100 Subject: [PATCH 006/470] Revert "Revert "Updated format settings references in the docs (datetime.md)"" --- docs/en/sql-reference/data-types/datetime.md | 8 ++++---- docs/ru/sql-reference/data-types/datetime.md | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 1adff18f598e..a465106c2ff2 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -36,9 +36,9 @@ You can explicitly set a time zone for `DateTime`-type columns when creating a t The [clickhouse-client](../../interfaces/cli.md) applies the server time zone by default if a time zone isn’t explicitly set when initializing the data type. To use the client time zone, run `clickhouse-client` with the `--use_client_time_zone` parameter. -ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings.md#settings-date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. +ClickHouse outputs values depending on the value of the [date_time_output_format](../../operations/settings/settings-formats.md#date_time_output_format) setting. `YYYY-MM-DD hh:mm:ss` text format by default. Additionally, you can change the output with the [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime) function. -When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings.md#settings-date_time_input_format) setting. +When inserting data into ClickHouse, you can use different formats of date and time strings, depending on the value of the [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format) setting. ## Examples @@ -147,8 +147,8 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 57f24786bb70..25e877941475 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/index.md) +- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format) +- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) From db0a5209f1ed0ddc88057ce8d2425b97e9c84397 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Mar 2024 16:14:59 +0000 Subject: [PATCH 007/470] Fix tests --- .../02783_parallel_replicas_trivial_count_optimization.sh | 4 ++-- .../0_stateless/03001_max_parallel_replicas_zero_value.sql | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh index bafab249b475..20b3efedd49e 100755 --- a/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh +++ b/tests/queries/0_stateless/02783_parallel_replicas_trivial_count_optimization.sh @@ -25,7 +25,7 @@ function run_query_with_pure_parallel_replicas () { $CLICKHOUSE_CLIENT \ --query "$2" \ --query_id "${1}_disabled" \ - --max_parallel_replicas 0 + --max_parallel_replicas 1 $CLICKHOUSE_CLIENT \ --query "$2" \ @@ -50,7 +50,7 @@ function run_query_with_custom_key_parallel_replicas () { $CLICKHOUSE_CLIENT \ --query "$2" \ --query_id "${1}_disabled" \ - --max_parallel_replicas 0 + --max_parallel_replicas 1 $CLICKHOUSE_CLIENT \ --query "$2" \ diff --git a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql index 611aa4777ba6..499486713a60 100644 --- a/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql +++ b/tests/queries/0_stateless/03001_max_parallel_replicas_zero_value.sql @@ -1,5 +1,5 @@ drop table if exists test_d; create table test_d engine=Distributed(test_cluster_two_shard_three_replicas_localhost, system, numbers); -select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{serverError BAD_ARGUMENTS} +select * from test_d limit 10 settings max_parallel_replicas = 0, prefer_localhost_replica = 0; --{clientError BAD_ARGUMENTS} drop table test_d; From fa9d9ea3f74e1f813fd40614dc5944f189bb9d94 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Mar 2024 20:23:39 +0000 Subject: [PATCH 008/470] Ignore DROP queries in stress test with 1/2 probability, use TRUNCATE instead of ignoring DROP in upgrade check for Memory/JOIN tables --- programs/client/Client.cpp | 27 +++++---------- src/Client/ClientBase.cpp | 67 +++++++++++++++++++++++++++++++++++++- src/Client/ClientBase.h | 9 ++++- tests/ci/stress.py | 5 +++ tests/clickhouse-test | 2 ++ 5 files changed, 89 insertions(+), 21 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 4203e4738ddc..70550b5952a1 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -50,7 +50,6 @@ #include #include #include -#include namespace fs = std::filesystem; using namespace std::literals; @@ -953,7 +952,9 @@ void Client::addOptions(OptionsDescription & options_description) ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ("no-warnings", "disable warnings when client connects to server") + /// TODO: Left for compatibility as it's used in upgrade check, remove after next release and use ignore-drop-queries-probability ("fake-drop", "Ignore all DROP queries, should be used only for testing") + ("ignore-drop-queries-probability", po::value(), "With specified probability ignore all DROP queries (replace them to TRUNCATE for engines like Memory/JOIN), should be used only for testing") ("accept-invalid-certificate", "Ignore certificate verification errors, equal to config parameters openSSL.client.invalidCertificateHandler.name=AcceptCertificateHandler and openSSL.client.verificationMode=none") ; @@ -1096,7 +1097,9 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("no-warnings")) config().setBool("no-warnings", true); if (options.count("fake-drop")) - fake_drop = true; + ignore_drop_queries_probability = 1; + if (options.count("ignore-drop-queries-probability")) + ignore_drop_queries_probability = std::min(options["ignore-drop-queries-probability"].as(), 1.); if (options.count("accept-invalid-certificate")) { config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); @@ -1138,13 +1141,6 @@ void Client::processOptions(const OptionsDescription & options_description, } -static bool checkIfStdoutIsRegularFile() -{ - struct stat file_stat; - return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); -} - - void Client::processConfig() { if (!queries.empty() && config().has("queries-file")) @@ -1181,14 +1177,7 @@ void Client::processConfig() pager = config().getString("pager", ""); is_default_format = !config().has("vertical") && !config().has("format"); - if (is_default_format && checkIfStdoutIsRegularFile()) - { - is_default_format = false; - std::optional format_from_file_name; - format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); - format = format_from_file_name ? *format_from_file_name : "TabSeparated"; - } - else if (config().has("vertical")) + if (config().has("vertical")) format = config().getString("format", "Vertical"); else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); @@ -1392,8 +1381,8 @@ void Client::readArguments( } -#pragma clang diagnostic ignored "-Wunused-function" -#pragma clang diagnostic ignored "-Wmissing-declarations" +#pragma GCC diagnostic ignored "-Wunused-function" +#pragma GCC diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseClient(int argc, char ** argv) { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 48962880b8f2..c0865d4fb13a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -68,6 +68,9 @@ #include #include +#include +#include + #include #include #include @@ -559,6 +562,11 @@ try pager_cmd = ShellCommand::execute(config); out_buf = &pager_cmd->in; } + /// We can use special buffer for query output for internal queries. + else if (output_format_buffer) + { + out_buf = output_format_buffer.get(); + } else { out_buf = &std_out; @@ -868,11 +876,68 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) processError(full_query); } +String ClientBase::getTableEngine(const String & database, const String & table) +{ + auto is_interactive_copy = is_interactive; + auto format_copy = format; + + is_interactive = false; + format = "TSVRaw"; + String result; + output_format_buffer = std::make_unique(result); + String query; + if (database.empty()) + query = fmt::format("SELECT engine FROM system.tables where name='{}' and database=currentDatabase()", table); + else + query = fmt::format("SELECT engine FROM system.tables where name='{}' and database='{}'", table, database); + + try + { + processTextAsSingleQuery(query); + } + catch (...) + { + result = ""; + } + + output_format_buffer->finalize(); + output_format_buffer.reset(); + is_interactive = is_interactive_copy; + format = format_copy; + boost::trim(result); + return result; +} + +void ClientBase::ignoreDropQueryOrTruncateTable(const DB::ASTDropQuery * drop_query) +{ + const auto & database = drop_query->getDatabase(); + const auto & table = drop_query->getTable(); + /// Use TRUNCATE for Memory/JOIN table engines to reduce memory usage in tests. + String table_engine = getTableEngine(database, table); + if (table_engine == "Memory" || table_engine == "JOIN") + { + String truncate_query; + if (database.empty()) + truncate_query = fmt::format("TRUNCATE TABLE {}", drop_query->getTable()); + else + truncate_query = fmt::format("TRUNCATE TABLE {}.{}", drop_query->getDatabase(), drop_query->getTable()); + + auto is_interactive_copy = is_interactive; + is_interactive = false; + processTextAsSingleQuery(truncate_query); + is_interactive = is_interactive_copy; + } +} void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr parsed_query) { - if (fake_drop && parsed_query->as()) + /// In tests we can ignore DROP queries with some probability. + const auto * drop_query = parsed_query->as(); + if (ignore_drop_queries_probability != 0 && drop_query && drop_query->kind == ASTDropQuery::Kind::Drop && std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= ignore_drop_queries_probability) + { + ignoreDropQueryOrTruncateTable(drop_query); return; + } auto query = query_to_execute; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index dd08e7c059be..fd81c4c40806 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -178,6 +178,12 @@ class ClientBase : public Poco::Util::Application, public IHints<2> void initQueryIdFormats(); bool addMergeTreeSettings(ASTCreateQuery & ast_create); + void ignoreDropQueryOrTruncateTable(const ASTDropQuery * drop_query); + /// Request table engine from system.tables from server. + String getTableEngine(const String & database, const String & table); + /// Send TRUNCATE query for specific table. + void truncateTable(const String & database, const String & table); + protected: static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context); bool processMultiQueryFromFile(const String & file_name); @@ -248,6 +254,7 @@ class ClientBase : public Poco::Util::Application, public IHints<2> /// The user can specify to redirect query output to a file. std::unique_ptr out_file_buf; std::shared_ptr output_format; + std::unique_ptr output_format_buffer; /// The user could specify special file for server logs (stderr by default) std::unique_ptr out_logs_buf; @@ -307,7 +314,7 @@ class ClientBase : public Poco::Util::Application, public IHints<2> QueryProcessingStage::Enum query_processing_stage; ClientInfo::QueryKind query_kind; - bool fake_drop = false; + double ignore_drop_queries_probability = 0; struct HostAndPort { diff --git a/tests/ci/stress.py b/tests/ci/stress.py index 7ccc058f79fb..b0076449a534 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -66,6 +66,11 @@ def get_options(i: int, upgrade_check: bool) -> str: if random.random() < 0.3: client_options.append(f"http_make_head_request={random.randint(0, 1)}") + # TODO: After release 23.3 use ignore-drop-queries-probability for both + # stress test and upgrade check + if not upgrade_check: + client_options.append("ignore-drop-queries-probability=0.5") + if client_options: options.append(" --client-option " + " ".join(client_options)) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index ce0feadf050e..cc62a1805791 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -2872,6 +2872,8 @@ def parse_args(): help="Do not run shard related tests", ) + # TODO: Remove upgrade-check option after release 23.3 and use + # ignore-drop-queries-probability option in stress.py as in stress tests group.add_argument( "--upgrade-check", action="store_true", From 4ceff16787eb16f5b485a6d70f6d8b29744a16bd Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Mar 2024 20:29:19 +0000 Subject: [PATCH 009/470] Fix bad conflict resolution --- programs/client/Client.cpp | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 70550b5952a1..caf2ce2f68aa 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -50,6 +50,7 @@ #include #include #include +#include namespace fs = std::filesystem; using namespace std::literals; @@ -1141,6 +1142,13 @@ void Client::processOptions(const OptionsDescription & options_description, } +static bool checkIfStdoutIsRegularFile() +{ + struct stat file_stat; + return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode); +} + + void Client::processConfig() { if (!queries.empty() && config().has("queries-file")) @@ -1177,7 +1185,14 @@ void Client::processConfig() pager = config().getString("pager", ""); is_default_format = !config().has("vertical") && !config().has("format"); - if (config().has("vertical")) + if (is_default_format && checkIfStdoutIsRegularFile()) + { + is_default_format = false; + std::optional format_from_file_name; + format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO); + format = format_from_file_name ? *format_from_file_name : "TabSeparated"; + } + else if (config().has("vertical")) format = config().getString("format", "Vertical"); else format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated"); @@ -1381,8 +1396,8 @@ void Client::readArguments( } -#pragma GCC diagnostic ignored "-Wunused-function" -#pragma GCC diagnostic ignored "-Wmissing-declarations" +#pragma clang diagnostic ignored "-Wunused-function" +#pragma clang diagnostic ignored "-Wmissing-declarations" int mainEntryClickHouseClient(int argc, char ** argv) { From 2e9130ca1e99587e47fcfff4be318601e4d52cfc Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 15 Mar 2024 20:30:44 +0000 Subject: [PATCH 010/470] Better --- src/Client/ClientBase.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index c0865d4fb13a..0fb8b27e20f4 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -918,9 +918,9 @@ void ClientBase::ignoreDropQueryOrTruncateTable(const DB::ASTDropQuery * drop_qu { String truncate_query; if (database.empty()) - truncate_query = fmt::format("TRUNCATE TABLE {}", drop_query->getTable()); + truncate_query = fmt::format("TRUNCATE TABLE {}", table); else - truncate_query = fmt::format("TRUNCATE TABLE {}.{}", drop_query->getDatabase(), drop_query->getTable()); + truncate_query = fmt::format("TRUNCATE TABLE {}.{}", database, table); auto is_interactive_copy = is_interactive; is_interactive = false; From eada7e8d29aa3d0a595125d70ceb5a7c20c1272f Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 18 Mar 2024 13:18:58 +0000 Subject: [PATCH 011/470] Fix server version --- tests/ci/stress.py | 2 +- tests/clickhouse-test | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/stress.py b/tests/ci/stress.py index b0076449a534..e04f06c951b3 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -66,7 +66,7 @@ def get_options(i: int, upgrade_check: bool) -> str: if random.random() < 0.3: client_options.append(f"http_make_head_request={random.randint(0, 1)}") - # TODO: After release 23.3 use ignore-drop-queries-probability for both + # TODO: After release 24.3 use ignore-drop-queries-probability for both # stress test and upgrade check if not upgrade_check: client_options.append("ignore-drop-queries-probability=0.5") diff --git a/tests/clickhouse-test b/tests/clickhouse-test index c457ae9a3031..83bf2f59131b 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -3170,7 +3170,7 @@ def parse_args(): help="Do not run shard related tests", ) - # TODO: Remove upgrade-check option after release 23.3 and use + # TODO: Remove upgrade-check option after release 24.3 and use # ignore-drop-queries-probability option in stress.py as in stress tests group.add_argument( "--upgrade-check", From 051103b0e0fef928cc41aafd00de7565b776dfd8 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 18 Mar 2024 15:39:21 +0000 Subject: [PATCH 012/470] Fix db iterator wait during async metrics collection --- src/Databases/DatabaseAtomic.cpp | 4 +- src/Databases/DatabaseAtomic.h | 2 +- src/Databases/DatabaseDictionary.cpp | 2 +- src/Databases/DatabaseDictionary.h | 2 +- src/Databases/DatabaseFilesystem.cpp | 2 +- src/Databases/DatabaseFilesystem.h | 2 +- src/Databases/DatabaseHDFS.h | 2 +- src/Databases/DatabaseLazy.cpp | 2 +- src/Databases/DatabaseLazy.h | 2 +- src/Databases/DatabaseOrdinary.cpp | 44 +++++++++++++------ src/Databases/DatabaseOrdinary.h | 3 +- src/Databases/DatabaseReplicated.cpp | 2 +- src/Databases/DatabaseS3.cpp | 2 +- src/Databases/DatabaseS3.h | 2 +- src/Databases/DatabasesCommon.cpp | 4 +- src/Databases/DatabasesCommon.h | 2 +- src/Databases/DatabasesOverlay.cpp | 2 +- src/Databases/DatabasesOverlay.h | 2 +- src/Databases/IDatabase.h | 13 +++++- .../MySQL/DatabaseMaterializedMySQL.cpp | 5 +-- .../MySQL/DatabaseMaterializedMySQL.h | 2 +- src/Databases/MySQL/DatabaseMySQL.cpp | 2 +- src/Databases/MySQL/DatabaseMySQL.h | 2 +- .../DatabaseMaterializedPostgreSQL.cpp | 6 +-- .../DatabaseMaterializedPostgreSQL.h | 2 +- src/Databases/PostgreSQL/DatabasePostgreSQL.h | 2 +- src/Databases/SQLite/DatabaseSQLite.cpp | 2 +- src/Databases/SQLite/DatabaseSQLite.h | 2 +- src/Interpreters/DatabaseCatalog.cpp | 3 ++ src/Interpreters/DatabaseCatalog.h | 6 +-- src/Interpreters/InterpreterDropQuery.cpp | 1 + .../ServerAsynchronousMetrics.cpp | 5 ++- src/Server/ReplicasStatusHandler.cpp | 5 ++- 33 files changed, 85 insertions(+), 56 deletions(-) diff --git a/src/Databases/DatabaseAtomic.cpp b/src/Databases/DatabaseAtomic.cpp index 0bf7c8af4b4d..37b6123acefc 100644 --- a/src/Databases/DatabaseAtomic.cpp +++ b/src/Databases/DatabaseAtomic.cpp @@ -416,9 +416,9 @@ void DatabaseAtomic::assertCanBeDetached(bool cleanup) } DatabaseTablesIteratorPtr -DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name) const +DatabaseAtomic::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { - auto base_iter = DatabaseOrdinary::getTablesIterator(local_context, filter_by_table_name); + auto base_iter = DatabaseOrdinary::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded); return std::make_unique(std::move(typeid_cast(*base_iter))); } diff --git a/src/Databases/DatabaseAtomic.h b/src/Databases/DatabaseAtomic.h index 404478f7cd1e..b59edd479ba6 100644 --- a/src/Databases/DatabaseAtomic.h +++ b/src/Databases/DatabaseAtomic.h @@ -46,7 +46,7 @@ class DatabaseAtomic : public DatabaseOrdinary void drop(ContextPtr /*context*/) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; void beforeLoadingMetadata(ContextMutablePtr context, LoadingStrictnessLevel mode) override; diff --git a/src/Databases/DatabaseDictionary.cpp b/src/Databases/DatabaseDictionary.cpp index 76fdb4fa961f..adb9a659fcd7 100644 --- a/src/Databases/DatabaseDictionary.cpp +++ b/src/Databases/DatabaseDictionary.cpp @@ -80,7 +80,7 @@ StoragePtr DatabaseDictionary::tryGetTable(const String & table_name, ContextPtr return createStorageDictionary(getDatabaseName(), load_result, getContext()); } -DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseDictionary::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { return std::make_unique(listTables(filter_by_table_name), getDatabaseName()); } diff --git a/src/Databases/DatabaseDictionary.h b/src/Databases/DatabaseDictionary.h index 469801d183e6..a18ea833710e 100644 --- a/src/Databases/DatabaseDictionary.h +++ b/src/Databases/DatabaseDictionary.h @@ -34,7 +34,7 @@ class DatabaseDictionary final : public IDatabase, WithContext StoragePtr tryGetTable(const String & table_name, ContextPtr context) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool empty() const override; diff --git a/src/Databases/DatabaseFilesystem.cpp b/src/Databases/DatabaseFilesystem.cpp index 05af0acf978e..b27a816a60d4 100644 --- a/src/Databases/DatabaseFilesystem.cpp +++ b/src/Databases/DatabaseFilesystem.cpp @@ -229,7 +229,7 @@ std::vector> DatabaseFilesystem::getTablesForBacku * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access */ -DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseFilesystem::getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const { return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabaseFilesystem.h b/src/Databases/DatabaseFilesystem.h index 3338aa28c21a..4b9db5e574d8 100644 --- a/src/Databases/DatabaseFilesystem.h +++ b/src/Databases/DatabaseFilesystem.h @@ -45,7 +45,7 @@ class DatabaseFilesystem : public IDatabase, protected WithContext std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const override; protected: StoragePtr getTableImpl(const String & name, ContextPtr context, bool throw_on_error) const; diff --git a/src/Databases/DatabaseHDFS.h b/src/Databases/DatabaseHDFS.h index b586a912e163..d19918000cf4 100644 --- a/src/Databases/DatabaseHDFS.h +++ b/src/Databases/DatabaseHDFS.h @@ -45,7 +45,7 @@ class DatabaseHDFS : public IDatabase, protected WithContext void shutdown() override; std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const override; protected: StoragePtr getTableImpl(const String & name, ContextPtr context) const; diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index 623c7fbee980..fb1b3ee626b8 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -152,7 +152,7 @@ StoragePtr DatabaseLazy::tryGetTable(const String & table_name) const return loadTable(table_name); } -DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseLazy::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { std::lock_guard lock(mutex); Strings filtered_tables; diff --git a/src/Databases/DatabaseLazy.h b/src/Databases/DatabaseLazy.h index 2b1b119754d6..4347649117d7 100644 --- a/src/Databases/DatabaseLazy.h +++ b/src/Databases/DatabaseLazy.h @@ -62,7 +62,7 @@ class DatabaseLazy final : public DatabaseOnDisk bool empty() const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; void attachTable(ContextPtr context, const String & table_name, const StoragePtr & table, const String & relative_table_path) override; diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 95bdcfc7dcef..3859c2fe0ceb 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -438,24 +438,40 @@ void DatabaseOrdinary::stopLoading() stop_load_table.clear(); } -DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { - // Wait for every table (matching the filter) to be loaded and started up before we make the snapshot. - // It is important, because otherwise table might be: - // - not attached and thus will be missed in the snapshot; - // - not started, which is not good for DDL operations. - LoadTaskPtrs tasks_to_wait; + if (!skip_not_loaded) { - std::lock_guard lock(mutex); - if (!filter_by_table_name) - tasks_to_wait.reserve(startup_table.size()); - for (const auto & [table_name, task] : startup_table) - if (!filter_by_table_name || filter_by_table_name(table_name)) - tasks_to_wait.emplace_back(task); + // Wait for every table (matching the filter) to be loaded and started up before we make the snapshot. + // It is important, because otherwise table might be: + // - not attached and thus will be missed in the snapshot; + // - not started, which is not good for DDL operations. + LoadTaskPtrs tasks_to_wait; + { + std::lock_guard lock(mutex); + if (!filter_by_table_name) + tasks_to_wait.reserve(startup_table.size()); + for (const auto & [table_name, task] : startup_table) + if (!filter_by_table_name || filter_by_table_name(table_name)) + tasks_to_wait.emplace_back(task); + } + waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), tasks_to_wait); } - waitLoad(currentPoolOr(TablesLoaderForegroundPoolId), tasks_to_wait); + return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded); +} - return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name); +Strings DatabaseOrdinary::getAllTableNames(ContextPtr context) const +{ + std::set unique_names; + { + std::lock_guard lock(mutex); + for (const auto & [table_name, _] : tables) + unique_names.emplace(table_name); + // Not yet loaded table are not listed in `tables`, so we have to add table names from tasks + for (const auto & [table_name, _] : startup_table) + unique_names.emplace(table_name); + } + return {unique_names.begin(), unique_names.end()}; } void DatabaseOrdinary::alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) diff --git a/src/Databases/DatabaseOrdinary.h b/src/Databases/DatabaseOrdinary.h index 7089540337a3..fa5827903cbc 100644 --- a/src/Databases/DatabaseOrdinary.h +++ b/src/Databases/DatabaseOrdinary.h @@ -56,7 +56,8 @@ class DatabaseOrdinary : public DatabaseOnDisk LoadTaskPtr startupDatabaseAsync(AsyncLoader & async_loader, LoadJobSet startup_after, LoadingStrictnessLevel mode) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; + Strings getAllTableNames(ContextPtr context) const override; void alterTable( ContextPtr context, diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 3b6a712510d6..0c2cf1bb0110 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1323,13 +1323,13 @@ void DatabaseReplicated::drop(ContextPtr context_) void DatabaseReplicated::stopReplication() { - stopLoading(); if (ddl_worker) ddl_worker->shutdown(); } void DatabaseReplicated::shutdown() { + stopLoading(); stopReplication(); ddl_worker_initialized = false; ddl_worker = nullptr; diff --git a/src/Databases/DatabaseS3.cpp b/src/Databases/DatabaseS3.cpp index 159a5242dbe0..1589cc1c75db 100644 --- a/src/Databases/DatabaseS3.cpp +++ b/src/Databases/DatabaseS3.cpp @@ -303,7 +303,7 @@ std::vector> DatabaseS3::getTablesForBackup(const * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access */ -DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseS3::getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const { return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabaseS3.h b/src/Databases/DatabaseS3.h index 5e7375dbd58e..7e38da0fe638 100644 --- a/src/Databases/DatabaseS3.h +++ b/src/Databases/DatabaseS3.h @@ -56,7 +56,7 @@ class DatabaseS3 : public IDatabase, protected WithContext void shutdown() override; std::vector> getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const override; static Configuration parseArguments(ASTs engine_args, ContextPtr context); diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index f8d6ad69ba8c..733af31bdffc 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -214,7 +214,7 @@ StoragePtr DatabaseWithOwnTablesBase::tryGetTable(const String & table_name, Con return tryGetTableNoWait(table_name); } -DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIterator(ContextPtr, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { std::lock_guard lock(mutex); if (!filter_by_table_name) @@ -347,7 +347,7 @@ StoragePtr DatabaseWithOwnTablesBase::getTableUnlocked(const String & table_name backQuote(database_name), backQuote(table_name)); } -std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const +std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool skip_not_loaded) const { std::vector> res; diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index 81a3c55a435a..2eecf8a564ff 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -35,7 +35,7 @@ class DatabaseWithOwnTablesBase : public IDatabase, protected WithContext StoragePtr detachTable(ContextPtr context, const String & table_name) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; diff --git a/src/Databases/DatabasesOverlay.cpp b/src/Databases/DatabasesOverlay.cpp index c8705254e735..2772db5e0662 100644 --- a/src/Databases/DatabasesOverlay.cpp +++ b/src/Databases/DatabasesOverlay.cpp @@ -254,7 +254,7 @@ void DatabasesOverlay::shutdown() db->shutdown(); } -DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context_, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabasesOverlay::getTablesIterator(ContextPtr context_, const FilterByNameFunction & filter_by_table_name, bool /*skip_not_loaded*/) const { Tables tables; for (const auto & db : databases) diff --git a/src/Databases/DatabasesOverlay.h b/src/Databases/DatabasesOverlay.h index b58df506f709..e0c31e009cc5 100644 --- a/src/Databases/DatabasesOverlay.h +++ b/src/Databases/DatabasesOverlay.h @@ -52,7 +52,7 @@ class DatabasesOverlay : public IDatabase, protected WithContext void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool empty() const override; diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 75662bfebe39..b00f2fe4baf0 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -229,7 +229,18 @@ class IDatabase : public std::enable_shared_from_this /// Get an iterator that allows you to pass through all the tables. /// It is possible to have "hidden" tables that are not visible when passing through, but are visible if you get them by name using the functions above. - virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}) const = 0; /// NOLINT + /// Wait for all tables to be loaded and started up. If `skip_not_loaded` is true, then not yet loaded or not yet started up (at the moment of iterator creation) tables are excluded. + virtual DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name = {}, bool skip_not_loaded = false) const = 0; /// NOLINT + + /// Returns list of table names. + virtual Strings getAllTableNames(ContextPtr context) const + { + // NOTE: This default implementation wait for all tables to be loaded and started up. It should be reimplemented for databases that support async loading. + Strings result; + for (auto table_it = getTablesIterator(context); table_it->isValid(); table_it->next()) + result.emplace_back(table_it->name()); + return result; + } /// Is the database empty. virtual bool empty() const = 0; diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp index 0f0d73ae16f8..d8360a24bcb5 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.cpp @@ -185,9 +185,9 @@ StoragePtr DatabaseMaterializedMySQL::tryGetTable(const String & name, ContextPt } DatabaseTablesIteratorPtr -DatabaseMaterializedMySQL::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const +DatabaseMaterializedMySQL::getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { - DatabaseTablesIteratorPtr iterator = DatabaseAtomic::getTablesIterator(context_, filter_by_table_name); + DatabaseTablesIteratorPtr iterator = DatabaseAtomic::getTablesIterator(context_, filter_by_table_name, skip_not_loaded); if (context_->isInternalQuery()) return iterator; return std::make_unique(std::move(iterator), this); @@ -201,7 +201,6 @@ void DatabaseMaterializedMySQL::checkIsInternalQuery(ContextPtr context_, const void DatabaseMaterializedMySQL::stopReplication() { - stopLoading(); materialize_thread.stopSynchronization(); started_up = false; } diff --git a/src/Databases/MySQL/DatabaseMaterializedMySQL.h b/src/Databases/MySQL/DatabaseMaterializedMySQL.h index d2976adcadb8..a6418e6fc5cb 100644 --- a/src/Databases/MySQL/DatabaseMaterializedMySQL.h +++ b/src/Databases/MySQL/DatabaseMaterializedMySQL.h @@ -73,7 +73,7 @@ class DatabaseMaterializedMySQL : public DatabaseAtomic StoragePtr tryGetTable(const String & name, ContextPtr context_) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context_, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; void checkIsInternalQuery(ContextPtr context_, const char * method) const; diff --git a/src/Databases/MySQL/DatabaseMySQL.cpp b/src/Databases/MySQL/DatabaseMySQL.cpp index d9b0f7f9ac7d..b2e199735db6 100644 --- a/src/Databases/MySQL/DatabaseMySQL.cpp +++ b/src/Databases/MySQL/DatabaseMySQL.cpp @@ -105,7 +105,7 @@ bool DatabaseMySQL::empty() const return true; } -DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & filter_by_table_name) const +DatabaseTablesIteratorPtr DatabaseMySQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & filter_by_table_name, bool /* skip_not_loaded */) const { Tables tables; std::lock_guard lock(mutex); diff --git a/src/Databases/MySQL/DatabaseMySQL.h b/src/Databases/MySQL/DatabaseMySQL.h index e5b1f434d2f2..084a8339be3f 100644 --- a/src/Databases/MySQL/DatabaseMySQL.h +++ b/src/Databases/MySQL/DatabaseMySQL.h @@ -58,7 +58,7 @@ class DatabaseMySQL final : public IDatabase, WithContext bool empty() const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_nam, bool skip_not_loaded) const override; ASTPtr getCreateDatabaseQuery() const override; diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp index b44bc136b1f6..5ef44d3826cd 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp @@ -455,8 +455,6 @@ void DatabaseMaterializedPostgreSQL::shutdown() void DatabaseMaterializedPostgreSQL::stopReplication() { - stopLoading(); - std::lock_guard lock(handler_mutex); if (replication_handler) replication_handler->shutdown(); @@ -484,10 +482,10 @@ void DatabaseMaterializedPostgreSQL::drop(ContextPtr local_context) DatabaseTablesIteratorPtr DatabaseMaterializedPostgreSQL::getTablesIterator( - ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const + ContextPtr local_context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const { /// Modify context into nested_context and pass query to Atomic database. - return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name); + return DatabaseAtomic::getTablesIterator(StorageMaterializedPostgreSQL::makeNestedTableContext(local_context), filter_by_table_name, skip_not_loaded); } void registerDatabaseMaterializedPostgreSQL(DatabaseFactory & factory) diff --git a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h index dfa53fa61d7b..cf1333d03c85 100644 --- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.h @@ -45,7 +45,7 @@ class DatabaseMaterializedPostgreSQL : public DatabaseAtomic void stopLoading() override; DatabaseTablesIteratorPtr - getTablesIterator(ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name) const override; + getTablesIterator(ContextPtr context, const DatabaseOnDisk::FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; StoragePtr tryGetTable(const String & name, ContextPtr context) const override; diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.h b/src/Databases/PostgreSQL/DatabasePostgreSQL.h index 3ba7333c98ec..137b9d5cef9e 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.h +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.h @@ -46,7 +46,7 @@ class DatabasePostgreSQL final : public IDatabase, WithContext void loadStoredObjects(ContextMutablePtr, LoadingStrictnessLevel /*mode*/) override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool isTableExist(const String & name, ContextPtr context) const override; StoragePtr tryGetTable(const String & name, ContextPtr context) const override; diff --git a/src/Databases/SQLite/DatabaseSQLite.cpp b/src/Databases/SQLite/DatabaseSQLite.cpp index b7a82fd9d0ff..e758ea35de56 100644 --- a/src/Databases/SQLite/DatabaseSQLite.cpp +++ b/src/Databases/SQLite/DatabaseSQLite.cpp @@ -46,7 +46,7 @@ bool DatabaseSQLite::empty() const } -DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseSQLite::getTablesIterator(ContextPtr local_context, const IDatabase::FilterByNameFunction &, bool) const { std::lock_guard lock(mutex); diff --git a/src/Databases/SQLite/DatabaseSQLite.h b/src/Databases/SQLite/DatabaseSQLite.h index e5e93bbc8ce3..6bd84a4d297b 100644 --- a/src/Databases/SQLite/DatabaseSQLite.h +++ b/src/Databases/SQLite/DatabaseSQLite.h @@ -32,7 +32,7 @@ class DatabaseSQLite final : public IDatabase, WithContext StoragePtr tryGetTable(const String & name, ContextPtr context) const override; - DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; + DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name, bool skip_not_loaded) const override; bool empty() const override; diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index 9d9f418934f6..7231181941af 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1602,6 +1602,9 @@ void DatabaseCatalog::reloadDisksTask() for (auto & database : getDatabases()) { + // WARNING: In case of `async_load_databases = true` getTablesIterator() call wait for all table in the database to be loaded. + // WARNING: It means that no database will be able to update configuration until all databases are fully loaded. + // TODO: We can split this task by table or by database to make loaded table operate as usual. auto it = database.second->getTablesIterator(getContext()); while (it->isValid()) { diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 6f05a3cea0f5..61ec2d9e320e 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -418,11 +418,7 @@ class TableNameHints : public IHints<> Names getAllRegisteredNames() const override { - Names result; - if (database) - for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next()) - result.emplace_back(table_it->name()); - return result; + return database ? database->getAllTableNames(context) : {}; } private: diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 72aa4cc63e33..237f49582a6f 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -400,6 +400,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, table_context->setInternalQuery(true); /// Do not hold extra shared pointers to tables std::vector> tables_to_drop; + // NOTE: This means we wait for all tables to be loaded inside getTablesIterator() call in case of `async_load_databases = true`. for (auto iterator = database->getTablesIterator(table_context); iterator->isValid(); iterator->next()) { auto table_ptr = iterator->table(); diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index fe7ccd64ffe1..d26cfcf9fe9d 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -278,7 +278,8 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr bool is_system = db.first == DatabaseCatalog::SYSTEM_DATABASE; - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + // Note that we skip not yet loaded tables, so metrics could possibly be lower than expected on fully loaded database just after server start if `async_load_databases = true`. + for (auto iterator = db.second->getTablesIterator(getContext(), {}, true); iterator->isValid(); iterator->next()) { ++total_number_of_tables; if (is_system) @@ -408,7 +409,7 @@ void ServerAsynchronousMetrics::updateDetachedPartsStats() if (!db.second->canContainMergeTreeTables()) continue; - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(getContext(), {}, true); iterator->isValid(); iterator->next()) { const auto & table = iterator->table(); if (!table) diff --git a/src/Server/ReplicasStatusHandler.cpp b/src/Server/ReplicasStatusHandler.cpp index 91c6bd722d32..67823117758f 100644 --- a/src/Server/ReplicasStatusHandler.cpp +++ b/src/Server/ReplicasStatusHandler.cpp @@ -51,7 +51,10 @@ void ReplicasStatusHandler::handleRequest(HTTPServerRequest & request, HTTPServe if (!db.second->canContainMergeTreeTables()) continue; - for (auto iterator = db.second->getTablesIterator(getContext()); iterator->isValid(); iterator->next()) + // Note that in case `async_load_databases = true` we do not want replica status handler to be hanging + // and waiting (in getTablesIterator() call) for every table to be load, so we just skip not-yet-loaded tables. + // If they have some lag it will be reflected as soon as they are load. + for (auto iterator = db.second->getTablesIterator(getContext(), {}, true); iterator->isValid(); iterator->next()) { const auto & table = iterator->table(); if (!table) From 31d5049f803074989792e01d459640df3ab03c50 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 18 Mar 2024 16:26:05 +0000 Subject: [PATCH 013/470] Move logic of ignoring drop queries to server --- programs/client/Client.cpp | 7 +-- src/Client/ClientBase.cpp | 61 ------------------- src/Client/ClientBase.h | 8 --- src/Core/Settings.h | 1 + src/Interpreters/InterpreterDropQuery.cpp | 11 +++- src/Interpreters/InterpreterDropQuery.h | 2 +- tests/ci/stress.py | 4 +- tests/clickhouse-test | 2 +- ..._ignore_drop_queries_probability.reference | 1 + .../03013_ignore_drop_queries_probability.sql | 18 ++++++ 10 files changed, 36 insertions(+), 79 deletions(-) create mode 100644 tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference create mode 100644 tests/queries/0_stateless/03013_ignore_drop_queries_probability.sql diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index caf2ce2f68aa..a8a152458214 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -953,9 +953,8 @@ void Client::addOptions(OptionsDescription & options_description) ("opentelemetry-tracestate", po::value(), "OpenTelemetry tracestate header as described by W3C Trace Context recommendation") ("no-warnings", "disable warnings when client connects to server") - /// TODO: Left for compatibility as it's used in upgrade check, remove after next release and use ignore-drop-queries-probability + /// TODO: Left for compatibility as it's used in upgrade check, remove after next release and use server setting ignore_drop_queries_probability ("fake-drop", "Ignore all DROP queries, should be used only for testing") - ("ignore-drop-queries-probability", po::value(), "With specified probability ignore all DROP queries (replace them to TRUNCATE for engines like Memory/JOIN), should be used only for testing") ("accept-invalid-certificate", "Ignore certificate verification errors, equal to config parameters openSSL.client.invalidCertificateHandler.name=AcceptCertificateHandler and openSSL.client.verificationMode=none") ; @@ -1098,9 +1097,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (options.count("no-warnings")) config().setBool("no-warnings", true); if (options.count("fake-drop")) - ignore_drop_queries_probability = 1; - if (options.count("ignore-drop-queries-probability")) - ignore_drop_queries_probability = std::min(options["ignore-drop-queries-probability"].as(), 1.); + config().setString("ignore_drop_queries_probability", "1"); if (options.count("accept-invalid-certificate")) { config().setString("openSSL.client.invalidCertificateHandler.name", "AcceptCertificateHandler"); diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 9126205b8680..de995ffe4083 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -876,69 +876,8 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) processError(full_query); } -String ClientBase::getTableEngine(const String & database, const String & table) -{ - auto is_interactive_copy = is_interactive; - auto format_copy = format; - - is_interactive = false; - format = "TSVRaw"; - String result; - output_format_buffer = std::make_unique(result); - String query; - if (database.empty()) - query = fmt::format("SELECT engine FROM system.tables where name='{}' and database=currentDatabase()", table); - else - query = fmt::format("SELECT engine FROM system.tables where name='{}' and database='{}'", table, database); - - try - { - processTextAsSingleQuery(query); - } - catch (...) - { - result = ""; - } - - output_format_buffer->finalize(); - output_format_buffer.reset(); - is_interactive = is_interactive_copy; - format = format_copy; - boost::trim(result); - return result; -} - -void ClientBase::ignoreDropQueryOrTruncateTable(const DB::ASTDropQuery * drop_query) -{ - const auto & database = drop_query->getDatabase(); - const auto & table = drop_query->getTable(); - /// Use TRUNCATE for Memory/JOIN table engines to reduce memory usage in tests. - String table_engine = getTableEngine(database, table); - if (table_engine == "Memory" || table_engine == "JOIN") - { - String truncate_query; - if (database.empty()) - truncate_query = fmt::format("TRUNCATE TABLE {}", table); - else - truncate_query = fmt::format("TRUNCATE TABLE {}.{}", database, table); - - auto is_interactive_copy = is_interactive; - is_interactive = false; - processTextAsSingleQuery(truncate_query); - is_interactive = is_interactive_copy; - } -} - void ClientBase::processOrdinaryQuery(const String & query_to_execute, ASTPtr parsed_query) { - /// In tests we can ignore DROP queries with some probability. - const auto * drop_query = parsed_query->as(); - if (ignore_drop_queries_probability != 0 && drop_query && drop_query->kind == ASTDropQuery::Kind::Drop && std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= ignore_drop_queries_probability) - { - ignoreDropQueryOrTruncateTable(drop_query); - return; - } - auto query = query_to_execute; /// Rewrite query only when we have query parameters. diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 462899994a2a..74d065bf342d 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -178,12 +178,6 @@ class ClientBase : public Poco::Util::Application, public IHints<2> void initQueryIdFormats(); bool addMergeTreeSettings(ASTCreateQuery & ast_create); - void ignoreDropQueryOrTruncateTable(const ASTDropQuery * drop_query); - /// Request table engine from system.tables from server. - String getTableEngine(const String & database, const String & table); - /// Send TRUNCATE query for specific table. - void truncateTable(const String & database, const String & table); - protected: static bool isSyncInsertWithData(const ASTInsertQuery & insert_query, const ContextPtr & context); bool processMultiQueryFromFile(const String & file_name); @@ -314,8 +308,6 @@ class ClientBase : public Poco::Util::Application, public IHints<2> QueryProcessingStage::Enum query_processing_stage; ClientInfo::QueryKind query_kind; - double ignore_drop_queries_probability = 0; - struct HostAndPort { String host; diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 415063eee84c..15c1719833c6 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -857,6 +857,7 @@ class IColumn; M(Bool, optimize_uniq_to_count, true, "Rewrite uniq and its variants(except uniqUpTo) to count if subquery has distinct or group by clause.", 0) \ M(Bool, use_variant_as_common_type, false, "Use Variant as a result type for if/multiIf in case when there is no common type for arguments", 0) \ M(Bool, enable_order_by_all, true, "Enable sorting expression ORDER BY ALL.", 0) \ + M(Float, ignore_drop_queries_probability, 0, "If enabled, server will ignore all DROP table queries with specified probability (for Memory and JOIN engines it will replcase DROP to TRUNCATE). Used for testing purposes", 0) \ \ /** Experimental functions */ \ M(Bool, allow_experimental_materialized_postgresql_table, false, "Allows to use the MaterializedPostgreSQL table engine. Disabled by default, because this feature is experimental", 0) \ diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 72aa4cc63e33..fef1d215d0a9 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -104,7 +104,7 @@ BlockIO InterpreterDropQuery::executeToTable(ASTDropQuery & query) return res; } -BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait) +BlockIO InterpreterDropQuery::executeToTableImpl(const ContextPtr & context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait) { /// NOTE: it does not contain UUID, we will resolve it with locked DDLGuard auto table_id = StorageID(query); @@ -151,6 +151,15 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue "Table {} is not a Dictionary", table_id.getNameForLogs()); + if (settings.ignore_drop_queries_probability != 0 && ast_drop_query.kind == ASTDropQuery::Kind::Drop && std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= settings.ignore_drop_queries_probability) + { + ast_drop_query.sync = false; + if (table->getName() != "Memory" && table->getName() != "Join") + return {}; + + ast_drop_query.kind = ASTDropQuery::Truncate; + } + /// Now get UUID, so we can wait for table data to be finally dropped table_id.uuid = database->tryGetTableUUID(table_id.table_name); diff --git a/src/Interpreters/InterpreterDropQuery.h b/src/Interpreters/InterpreterDropQuery.h index 7ae544a7356a..8829fbe9ea53 100644 --- a/src/Interpreters/InterpreterDropQuery.h +++ b/src/Interpreters/InterpreterDropQuery.h @@ -37,7 +37,7 @@ class InterpreterDropQuery : public IInterpreter, WithMutableContext BlockIO executeToDatabaseImpl(const ASTDropQuery & query, DatabasePtr & database, std::vector & uuids_to_wait); BlockIO executeToTable(ASTDropQuery & query); - BlockIO executeToTableImpl(ContextPtr context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait); + BlockIO executeToTableImpl(const ContextPtr& context_, ASTDropQuery & query, DatabasePtr & db, UUID & uuid_to_wait); static void waitForTableToBeActuallyDroppedOrDetached(const ASTDropQuery & query, const DatabasePtr & db, const UUID & uuid_to_wait); diff --git a/tests/ci/stress.py b/tests/ci/stress.py index e04f06c951b3..5ed88c8df7ed 100755 --- a/tests/ci/stress.py +++ b/tests/ci/stress.py @@ -66,10 +66,10 @@ def get_options(i: int, upgrade_check: bool) -> str: if random.random() < 0.3: client_options.append(f"http_make_head_request={random.randint(0, 1)}") - # TODO: After release 24.3 use ignore-drop-queries-probability for both + # TODO: After release 24.3 use ignore_drop_queries_probability for both # stress test and upgrade check if not upgrade_check: - client_options.append("ignore-drop-queries-probability=0.5") + client_options.append("ignore_drop_queries_probability=0.5") if client_options: options.append(" --client-option " + " ".join(client_options)) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 83bf2f59131b..edc7825896a5 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -3171,7 +3171,7 @@ def parse_args(): ) # TODO: Remove upgrade-check option after release 24.3 and use - # ignore-drop-queries-probability option in stress.py as in stress tests + # ignore_drop_queries_probability option in stress.py as in stress tests group.add_argument( "--upgrade-check", action="store_true", diff --git a/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference new file mode 100644 index 000000000000..d81cc0710eb6 --- /dev/null +++ b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference @@ -0,0 +1 @@ +42 diff --git a/tests/queries/0_stateless/03013_ignore_drop_queries_probability.sql b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.sql new file mode 100644 index 000000000000..5c7b99987761 --- /dev/null +++ b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.sql @@ -0,0 +1,18 @@ +create table test_memory (number UInt64) engine=Memory; +insert into test_memory select 42; +drop table test_memory settings ignore_drop_queries_probability=1; +select * from test_memory; +drop table test_memory; + +create table test_merge_tree (number UInt64) engine=MergeTree order by number; +insert into test_merge_tree select 42; +drop table test_merge_tree settings ignore_drop_queries_probability=1; +select * from test_merge_tree; +drop table test_merge_tree; + +create table test_join (number UInt64) engine=Join(ALL, LEFT, number); +insert into test_join select 42; +drop table test_join settings ignore_drop_queries_probability=1; +select * from test_join; +drop table test_join; + From 4a58705f7bcfcb2b4a66bf8def54297a6dc64ef2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 15 Mar 2024 18:08:41 +0000 Subject: [PATCH 014/470] impl trigger rebuild fix move fix black trigger rebuild trigger rebuild fix mypy fix fix --- tests/ci/ci.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 29906e6571f8..795c0d7b7c65 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1613,6 +1613,42 @@ def _upload_build_profile_data( logging.error("Failed to insert binary_size_file for the build, continue") +def _add_build_to_version_history( + pr_info: PRInfo, + job_report: JobReport, + git_ref: str, + version: str, + ch_helper: ClickHouseHelper, +) -> None: + ci_logs_credentials = CiLogsCredentials(Path("/dev/null")) + if not ci_logs_credentials.host: + return + + # with some probability we will not silently break this logic + assert pr_info.sha and pr_info.commit_html_url and version and git_ref + + data = { + "check_start_time": job_report.start_time, + "pull_request_number": pr_info.number, + "pull_request_url": pr_info.pr_html_url, + "commit_sha": pr_info.sha, + "commit_url": pr_info.commit_html_url, + "version": version, + "git_ref": git_ref, + } + + json_str = json.dumps(data) + + print(f"::notice ::Log Adding record to versions history: {json_str}") + + try: + ch_helper.insert_json_into( + db="default", table="version_history", json_str=json_str + ) + except InsertException: + logging.error("Failed to insert profile data for the build, continue") + + def _run_test(job_name: str, run_command: str) -> int: assert ( run_command or CI_CONFIG.get_job_config(job_name).run_command @@ -1986,6 +2022,11 @@ def main() -> int: ch_helper.insert_events_into( db="default", table="checks", events=prepared_events ) + + if args.job_name == "DockerServerImageRelease" and indata is not None: + _add_build_to_version_history( + pr_info, job_report, indata["git_ref"], indata["version"], ch_helper + ) else: # no job report print(f"No job report for {[args.job_name]} - do nothing") From ac352b96f038ab3f393ac330097f8dc03db31001 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 18 Mar 2024 15:26:33 +0000 Subject: [PATCH 015/470] remove copy-paste --- tests/ci/ci.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 795c0d7b7c65..c5fa67a66cdf 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1620,10 +1620,6 @@ def _add_build_to_version_history( version: str, ch_helper: ClickHouseHelper, ) -> None: - ci_logs_credentials = CiLogsCredentials(Path("/dev/null")) - if not ci_logs_credentials.host: - return - # with some probability we will not silently break this logic assert pr_info.sha and pr_info.commit_html_url and version and git_ref From 0b2c2741320ac542ccb3e744e8849ca4767e49ae Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 18 Mar 2024 17:45:23 +0000 Subject: [PATCH 016/470] write docker tag --- tests/ci/ci.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index c5fa67a66cdf..f2e0828082cb 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1618,6 +1618,7 @@ def _add_build_to_version_history( job_report: JobReport, git_ref: str, version: str, + docker_tag: str, ch_helper: ClickHouseHelper, ) -> None: # with some probability we will not silently break this logic @@ -1630,6 +1631,7 @@ def _add_build_to_version_history( "commit_sha": pr_info.sha, "commit_url": pr_info.commit_html_url, "version": version, + "docker_tag": docker_tag, "git_ref": git_ref, } @@ -2019,9 +2021,14 @@ def main() -> int: db="default", table="checks", events=prepared_events ) - if args.job_name == "DockerServerImageRelease" and indata is not None: + if "DockerServerImage" in args.job_name and indata is not None: _add_build_to_version_history( - pr_info, job_report, indata["git_ref"], indata["version"], ch_helper + pr_info, + job_report, + indata["git_ref"], + indata["version"], + indata["build"], + ch_helper, ) else: # no job report From 46debe89c22e8f423c66c380caf3e90642e81a77 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 18 Mar 2024 18:17:55 +0000 Subject: [PATCH 017/470] fix --- src/Databases/DatabaseReplicated.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 0c2cf1bb0110..ffaefe08b5f4 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -1329,7 +1329,6 @@ void DatabaseReplicated::stopReplication() void DatabaseReplicated::shutdown() { - stopLoading(); stopReplication(); ddl_worker_initialized = false; ddl_worker = nullptr; From 726db7361ffd0f31f573be45c3d8573bef4e5935 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 18 Mar 2024 20:04:56 +0000 Subject: [PATCH 018/470] add missing method --- tests/ci/clickhouse_helper.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 637c4519d3d7..7a119ee15776 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -109,6 +109,16 @@ def _insert_post(*args, **kwargs): def _insert_json_str_info(self, db, table, json_str): self.insert_json_str(self.url, self.auth, db, table, json_str) + def insert_json_into(self, db, table, json_str, safe=True): + try: + self._insert_json_str_info(db, table, json_str) + except InsertException as e: + logging.error( + "Exception happened during inserting data into clickhouse: %s", e + ) + if not safe: + raise + def insert_event_into(self, db, table, event, safe=True): event_str = json.dumps(event) try: From dba72bf9c5817a96ea083cdb3edf5e3d0ded08df Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 19 Mar 2024 10:02:35 +0000 Subject: [PATCH 019/470] Fix --- src/Client/ClientBase.cpp | 8 -------- src/Client/ClientBase.h | 1 - src/Core/SettingsChangesHistory.h | 1 + 3 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index de995ffe4083..5ec18c41f05a 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -68,9 +68,6 @@ #include #include -#include -#include - #include #include #include @@ -562,11 +559,6 @@ try pager_cmd = ShellCommand::execute(config); out_buf = &pager_cmd->in; } - /// We can use special buffer for query output for internal queries. - else if (output_format_buffer) - { - out_buf = output_format_buffer.get(); - } else { out_buf = &std_out; diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 74d065bf342d..d01a69f14dc6 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -248,7 +248,6 @@ class ClientBase : public Poco::Util::Application, public IHints<2> /// The user can specify to redirect query output to a file. std::unique_ptr out_file_buf; std::shared_ptr output_format; - std::unique_ptr output_format_buffer; /// The user could specify special file for server logs (stderr by default) std::unique_ptr out_logs_buf; diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 072b9803682a..c8d57626136b 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ static std::map sett { {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, + {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, From 87037d6d7df0bd825e40236c1f3a932865bd93de Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 20 Mar 2024 16:17:30 +0000 Subject: [PATCH 020/470] fix build --- src/Interpreters/DatabaseCatalog.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 61ec2d9e320e..629bc6c5f4a4 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -418,7 +418,9 @@ class TableNameHints : public IHints<> Names getAllRegisteredNames() const override { - return database ? database->getAllTableNames(context) : {}; + if (database) + return database->getAllTableNames(context); + return {}; } private: From a91eaa78ee0ecd1e1fc78fbb8a0379403b599d9f Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 21 Mar 2024 11:46:44 +0100 Subject: [PATCH 021/470] Update src/Interpreters/ServerAsynchronousMetrics.cpp Co-authored-by: Antonio Andelic --- src/Interpreters/ServerAsynchronousMetrics.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp index d26cfcf9fe9d..7703a3521303 100644 --- a/src/Interpreters/ServerAsynchronousMetrics.cpp +++ b/src/Interpreters/ServerAsynchronousMetrics.cpp @@ -279,7 +279,7 @@ void ServerAsynchronousMetrics::updateImpl(TimePoint update_time, TimePoint curr bool is_system = db.first == DatabaseCatalog::SYSTEM_DATABASE; // Note that we skip not yet loaded tables, so metrics could possibly be lower than expected on fully loaded database just after server start if `async_load_databases = true`. - for (auto iterator = db.second->getTablesIterator(getContext(), {}, true); iterator->isValid(); iterator->next()) + for (auto iterator = db.second->getTablesIterator(getContext(), {}, /*skip_not_loaded=*/true); iterator->isValid(); iterator->next()) { ++total_number_of_tables; if (is_system) From f0491595710a17a43fb313f2f30d74ab9d3fb144 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 21 Mar 2024 15:37:00 +0000 Subject: [PATCH 022/470] Try to fix abort in arrow --- contrib/arrow | 2 +- .../Formats/Impl/ArrowBufferedStreams.cpp | 76 +++++++++++++++---- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/contrib/arrow b/contrib/arrow index ba5c67934e82..12232bbbe39b 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit ba5c67934e8274d649befcffab56731632dc5253 +Subproject commit 12232bbbe39b4ffbd921a0caff6d046ae009a753 diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 83d7a8b7bbd0..06819a860d8d 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -4,6 +4,7 @@ #if USE_ARROW || USE_ORC || USE_PARQUET #include +#include #include #include #include @@ -11,7 +12,7 @@ #include #include #include -#include +//#include #include @@ -41,9 +42,18 @@ arrow::Result ArrowBufferedOutputStream::Tell() const arrow::Status ArrowBufferedOutputStream::Write(const void * data, int64_t length) { - out.write(reinterpret_cast(data), length); - total_length += length; - return arrow::Status::OK(); + try + { + out.write(reinterpret_cast(data), length); + total_length += length; + return arrow::Status::OK(); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while writing to arrow stream: {}", message); + return arrow::Status::IOError(message); + } } RandomAccessFileFromSeekableReadBuffer::RandomAccessFileFromSeekableReadBuffer(ReadBuffer & in_, std::optional file_size_, bool avoid_buffering_) @@ -74,9 +84,18 @@ arrow::Result RandomAccessFileFromSeekableReadBuffer::Tell() const arrow::Result RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes, void * out) { - if (avoid_buffering) - in.setReadUntilPosition(seekable_in.getPosition() + nbytes); - return in.readBig(reinterpret_cast(out), nbytes); + try + { + if (avoid_buffering) + in.setReadUntilPosition(seekable_in.getPosition() + nbytes); + return in.readBig(reinterpret_cast(out), nbytes); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while reading from arrow stream: {}", message); + return arrow::Status::IOError(message); + } } arrow::Result> RandomAccessFileFromSeekableReadBuffer::Read(int64_t nbytes) @@ -98,14 +117,23 @@ arrow::Future> RandomAccessFileFromSeekableReadBu arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position) { - if (avoid_buffering) + try { - // Seeking to a position above a previous setReadUntilPosition() confuses some of the - // ReadBuffer implementations. - in.setReadUntilEnd(); + if (avoid_buffering) + { + // Seeking to a position above a previous setReadUntilPosition() confuses some of the + // ReadBuffer implementations. + in.setReadUntilEnd(); + } + seekable_in.seek(position, SEEK_SET); + return arrow::Status::OK(); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while seeking arrow file: {}", message); + return arrow::Status::IOError(message); } - seekable_in.seek(position, SEEK_SET); - return arrow::Status::OK(); } @@ -115,7 +143,16 @@ ArrowInputStreamFromReadBuffer::ArrowInputStreamFromReadBuffer(ReadBuffer & in_) arrow::Result ArrowInputStreamFromReadBuffer::Read(int64_t nbytes, void * out) { - return in.readBig(reinterpret_cast(out), nbytes); + try + { + return in.readBig(reinterpret_cast(out), nbytes); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while reading from arrow stream: {}", message); + return arrow::Status::IOError(message); + } } arrow::Result> ArrowInputStreamFromReadBuffer::Read(int64_t nbytes) @@ -154,7 +191,16 @@ arrow::Result RandomAccessFileFromRandomAccessReadBuffer::GetSize() arrow::Result RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_t position, int64_t nbytes, void* out) { - return in.readBigAt(reinterpret_cast(out), nbytes, position); + try + { + return in.readBigAt(reinterpret_cast(out), nbytes, position); + } + catch (...) + { + auto message = getCurrentExceptionMessage(false); + LOG_ERROR(getLogger("ArrowBufferedOutputStream"), "Error while reading from arrow stream: {}", message); + return arrow::Status::IOError(message); + } } arrow::Result> RandomAccessFileFromRandomAccessReadBuffer::ReadAt(int64_t position, int64_t nbytes) From 01fb309d2e925a20e1cae8db8e49c311ffb31f19 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 21 Mar 2024 15:39:15 +0000 Subject: [PATCH 023/470] Add test --- tests/queries/0_stateless/02834_apache_arrow_abort.reference | 0 tests/queries/0_stateless/02834_apache_arrow_abort.sql | 4 ++++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/02834_apache_arrow_abort.reference create mode 100644 tests/queries/0_stateless/02834_apache_arrow_abort.sql diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.reference b/tests/queries/0_stateless/02834_apache_arrow_abort.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/02834_apache_arrow_abort.sql b/tests/queries/0_stateless/02834_apache_arrow_abort.sql new file mode 100644 index 000000000000..47db46f1e43a --- /dev/null +++ b/tests/queries/0_stateless/02834_apache_arrow_abort.sql @@ -0,0 +1,4 @@ +-- Tags: no-fasttest +-- This tests depends on internet access, but it does not matter, because it only has to check that there is no abort due to a bug in Apache Arrow library. + +INSERT INTO TABLE FUNCTION url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet') SELECT * FROM url('https://clickhouse-public-datasets.s3.amazonaws.com/hits_compatible/hits.parquet'); -- { serverError CANNOT_WRITE_TO_OSTREAM, RECEIVED_ERROR_FROM_REMOTE_IO_SERVER, POCO_EXCEPTION } From 6e8e01d47e100ff92b21151cf2b3e4029625af1f Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 21 Mar 2024 15:46:04 +0000 Subject: [PATCH 024/470] Fix headers --- src/Processors/Formats/Impl/ArrowBufferedStreams.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index 2fa808ccdf25..84375ccd5ce9 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -12,7 +12,7 @@ #include #include #include -//#include +#include #include From 82c171b748c8f3de04369eb04769bb5ed5ef554b Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Fri, 22 Mar 2024 11:30:15 +0000 Subject: [PATCH 025/470] add ranking functions + make the supported table more obvious --- .../sql-reference/window-functions/index.md | 176 ++++++++++++++---- 1 file changed, 142 insertions(+), 34 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 9b2ded7b6cee..2f44c36acb4f 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -12,25 +12,23 @@ Some of the calculations that you can do are similar to those that can be done w ClickHouse supports the standard grammar for defining windows and window functions. The table below indicates whether a feature is currently supported. -| Feature | Support or workaround | +| Feature | Supported? | |------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | supported | -| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | supported | -| `WINDOW` clause (`select ... from table window w as (partition by id)`) | supported | -| `ROWS` frame | supported | -| `RANGE` frame | supported, the default | -| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | not supported, specify the number of seconds instead (`RANGE` works with any numeric type). | -| `GROUPS` frame | not supported | -| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | all aggregate functions are supported | -| `rank()`, `dense_rank()`, `row_number()` | supported | -| `lag/lead(value, offset)` | Not supported. Workarounds: | -| | 1) replace with `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead` | -| | 2) use `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | Supported. Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | +| ad hoc window specification (`count(*) over (partition by id order by time desc)`) | ✅ | +| expressions involving window functions, e.g. `(count(*) over ()) / 2)` | ✅ | +| `WINDOW` clause (`select ... from table window w as (partition by id)`) | ✅ | +| `ROWS` frame | ✅ | +| `RANGE` frame | ✅ (the default) | +| `INTERVAL` syntax for `DateTime` `RANGE OFFSET` frame | ❌ (specify the number of seconds instead (`RANGE` works with any numeric type).) | +| `GROUPS` frame | ❌ | +| Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | +| `rank()`, `dense_rank()`, `row_number()` | ✅ | +| `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | +| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions -There are also the following window function that's specific to ClickHouse: +There is also the following ClickHouse specific window function: ### nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL X UNITS]) @@ -89,6 +87,62 @@ These functions can be used only as a window function. Let's have a look at some examples of how window functions can be used. +### Numbering rows + +```sql +CREATE TABLE salaries +( + `team` String, + `player` String, + `salary` UInt32, + `position` String +) +Engine = Memory; + +INSERT INTO salaries FORMAT Values + ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), + ('Port Elizabeth Barbarians', 'Charles Juarez', 190000, 'F'), + ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Scott Harrison', 150000, 'D'), + ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary) AS row +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─row─┐ +│ Michael Stanley │ 150000 │ 1 │ +│ Scott Harrison │ 150000 │ 2 │ +│ Charles Juarez │ 190000 │ 3 │ +│ Gary Chen │ 195000 │ 4 │ +│ Robert George │ 195000 │ 5 │ +└─────────────────┴────────┴─────┘ +``` + +```sql +SELECT player, salary, + row_number() OVER (ORDER BY salary) AS row, + rank() OVER (ORDER BY salary) AS rank, + dense_rank() OVER (ORDER BY salary) AS denseRank +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─row─┬─rank─┬─denseRank─┐ +│ Michael Stanley │ 150000 │ 1 │ 1 │ 1 │ +│ Scott Harrison │ 150000 │ 2 │ 1 │ 1 │ +│ Charles Juarez │ 190000 │ 3 │ 3 │ 2 │ +│ Gary Chen │ 195000 │ 4 │ 4 │ 3 │ +│ Robert George │ 195000 │ 5 │ 4 │ 3 │ +└─────────────────┴────────┴─────┴──────┴───────────┘ +``` + +### Partitioning by column + ```sql CREATE TABLE wf_partition ( @@ -120,6 +174,8 @@ ORDER BY └──────────┴───────┴───────┴──────────────┘ ``` +### Frame bounding + ```sql CREATE TABLE wf_frame ( @@ -131,14 +187,19 @@ ENGINE = Memory; INSERT INTO wf_frame FORMAT Values (1,1,1), (1,2,2), (1,3,3), (1,4,4), (1,5,5); +``` --- frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) +```sql +-- Frame is bounded by bounds of a partition (BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -151,7 +212,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4,5] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- short form - no bound expression, no order by SELECT part_key, @@ -169,14 +232,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4,5] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- frame is bounded by the beggining of a partition and the current row +```sql +-- frame is bounded by the beginning of a partition and the current row SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -189,8 +257,10 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- short form (frame is bounded by the beggining of a partition and the current row) +```sql +-- short form (frame is bounded by the beginning of a partition and the current row) SELECT part_key, value, @@ -207,8 +277,10 @@ ORDER BY │ 1 │ 4 │ 4 │ [1,2,3,4] │ │ 1 │ 5 │ 5 │ [1,2,3,4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` --- frame is bounded by the beggining of a partition and the current row, but order is backward +```sql +-- frame is bounded by the beginning of a partition and the current row, but order is backward SELECT part_key, value, @@ -225,14 +297,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [5,4] │ │ 1 │ 5 │ 5 │ [5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- sliding frame - 1 PRECEDING ROW AND CURRENT ROW SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN 1 PRECEDING AND CURRENT ROW) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN 1 PRECEDING AND CURRENT ROW + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -245,14 +322,19 @@ ORDER BY │ 1 │ 4 │ 4 │ [3,4] │ │ 1 │ 5 │ 5 │ [4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- sliding frame - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING SELECT part_key, value, order, - groupArray(value) OVER (PARTITION BY part_key ORDER BY order ASC - Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING) AS frame_values + groupArray(value) OVER ( + PARTITION BY part_key + ORDER BY order ASC + Rows BETWEEN 1 PRECEDING AND UNBOUNDED FOLLOWING + ) AS frame_values FROM wf_frame ORDER BY part_key ASC, @@ -264,7 +346,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [3,4,5] │ │ 1 │ 5 │ 5 │ [4,5] │ └──────────┴───────┴───────┴──────────────┘ +``` +```sql -- row_number does not respect the frame, so rn_1 = rn_2 = rn_3 != rn_4 SELECT part_key, @@ -278,8 +362,11 @@ SELECT FROM wf_frame WINDOW w1 AS (PARTITION BY part_key ORDER BY order DESC), - w2 AS (PARTITION BY part_key ORDER BY order DESC - Rows BETWEEN 1 PRECEDING AND CURRENT ROW) + w2 AS ( + PARTITION BY part_key + ORDER BY order DESC + Rows BETWEEN 1 PRECEDING AND CURRENT ROW + ) ORDER BY part_key ASC, value ASC; @@ -290,7 +377,9 @@ ORDER BY │ 1 │ 4 │ 4 │ [5,4] │ 2 │ 2 │ 2 │ 2 │ │ 1 │ 5 │ 5 │ [5] │ 1 │ 1 │ 1 │ 1 │ └──────────┴───────┴───────┴──────────────┴──────┴──────┴──────┴──────┘ +``` +```sql -- first_value and last_value respect the frame SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -313,7 +402,9 @@ ORDER BY │ [1,2,3,4] │ 1 │ 4 │ [3,4] │ 3 │ 4 │ │ [1,2,3,4,5] │ 1 │ 5 │ [4,5] │ 4 │ 5 │ └────────────────┴───────────────┴──────────────┴────────────────┴───────────────┴──────────────┘ +``` +```sql -- second value within the frame SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -330,7 +421,9 @@ ORDER BY │ [1,2,3,4] │ 2 │ │ [2,3,4,5] │ 3 │ └────────────────┴──────────────┘ +``` +```sql -- second value within the frame + Null for missing values SELECT groupArray(value) OVER w1 AS frame_values_1, @@ -351,6 +444,8 @@ ORDER BY ## Real world examples +The following examples solve common real-world problems. + ### Maximum/total salary per department. ```sql @@ -369,7 +464,9 @@ INSERT INTO employees FORMAT Values ('IT', 'Tim', 200), ('IT', 'Anna', 300), ('IT', 'Elen', 500); +``` +```sql SELECT department, employee_name AS emp, @@ -386,8 +483,10 @@ FROM max(salary) OVER wndw AS max_salary_per_dep, sum(salary) OVER wndw AS total_salary_per_dep FROM employees - WINDOW wndw AS (PARTITION BY department - rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) + WINDOW wndw AS ( + PARTITION BY department + rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING + ) ORDER BY department ASC, employee_name ASC @@ -421,7 +520,9 @@ INSERT INTO warehouse VALUES ('sku1', '2020-01-01', 1), ('sku1', '2020-02-01', 1), ('sku1', '2020-03-01', 1); +``` +```sql SELECT item, ts, @@ -461,13 +562,18 @@ insert into sensors values('cpu_temp', '2020-01-01 00:00:00', 87), ('cpu_temp', '2020-01-01 00:00:05', 87), ('cpu_temp', '2020-01-01 00:00:06', 87), ('cpu_temp', '2020-01-01 00:00:07', 87); +``` + +```sql SELECT metric, ts, value, - avg(value) OVER - (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN 2 PRECEDING AND CURRENT ROW) - AS moving_avg_temp + avg(value) OVER ( + PARTITION BY metric + ORDER BY ts ASC + Rows BETWEEN 2 PRECEDING AND CURRENT ROW + ) AS moving_avg_temp FROM sensors ORDER BY metric ASC, @@ -536,7 +642,9 @@ insert into sensors values('ambient_temp', '2020-01-01 00:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16), ('ambient_temp', '2020-03-01 12:00:00', 16); +``` +```sql SELECT metric, ts, From 2df818866797c23fc38063663441280059fad565 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Fri, 22 Mar 2024 11:54:04 +0000 Subject: [PATCH 026/470] Agg functions --- .../sql-reference/window-functions/index.md | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 2f44c36acb4f..19821781d0e0 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -101,9 +101,9 @@ Engine = Memory; INSERT INTO salaries FORMAT Values ('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'), - ('Port Elizabeth Barbarians', 'Charles Juarez', 190000, 'F'), + ('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'), ('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'), - ('Port Elizabeth Barbarians', 'Scott Harrison', 150000, 'D'), + ('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'), ('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'); ``` @@ -141,6 +141,46 @@ FROM salaries; └─────────────────┴────────┴─────┴──────┴───────────┘ ``` +### Aggregation functions + +Compare each player's salary to the average for their team. + +```sql +SELECT player, salary, team, + avg(salary) OVER (PARTITION BY team) AS teamAvg, + salary - teamAvg AS diff +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐ +│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 170000 │ 20000 │ +│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 170000 │ -20000 │ +│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │ +│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 180000 │ -30000 │ +│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 180000 │ 15000 │ +└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘ +``` + +Compare each player's salary to the maximum for their team. + +```sql +SELECT player, salary, team, + max(salary) OVER (PARTITION BY team) AS teamAvg, + salary - teamAvg AS diff +FROM salaries; +``` + +```text +┌─player──────────┬─salary─┬─team──────────────────────┬─teamAvg─┬───diff─┐ +│ Charles Juarez │ 190000 │ New Coreystad Archdukes │ 190000 │ 0 │ +│ Scott Harrison │ 150000 │ New Coreystad Archdukes │ 190000 │ -40000 │ +│ Gary Chen │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │ +│ Michael Stanley │ 150000 │ Port Elizabeth Barbarians │ 195000 │ -45000 │ +│ Robert George │ 195000 │ Port Elizabeth Barbarians │ 195000 │ 0 │ +└─────────────────┴────────┴───────────────────────────┴─────────┴────────┘ +``` + ### Partitioning by column ```sql @@ -446,7 +486,7 @@ ORDER BY The following examples solve common real-world problems. -### Maximum/total salary per department. +### Maximum/total salary per department ```sql CREATE TABLE employees @@ -502,7 +542,7 @@ FROM └────────────┴──────┴────────┴────────────────────┴──────────────────────┴──────────────────┘ ``` -### Cumulative sum. +### Cumulative sum ```sql CREATE TABLE warehouse From e91dc87f824a623c941292a0876223d931dc6b4b Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 21 Mar 2024 17:18:04 +0100 Subject: [PATCH 027/470] Speed up dynamic resize of filesystem cache --- src/Interpreters/Cache/EvictionCandidates.cpp | 72 ++++++++---- src/Interpreters/Cache/EvictionCandidates.h | 18 +++ src/Interpreters/Cache/FileCache.cpp | 46 ++++---- src/Interpreters/Cache/FileCache.h | 1 + src/Interpreters/Cache/IFileCachePriority.h | 14 ++- .../Cache/LRUFileCachePriority.cpp | 103 +++++++++--------- src/Interpreters/Cache/LRUFileCachePriority.h | 23 +++- src/Interpreters/Cache/Metadata.cpp | 79 +++++++------- src/Interpreters/Cache/Metadata.h | 18 ++- .../Cache/SLRUFileCachePriority.cpp | 30 +++++ .../Cache/SLRUFileCachePriority.h | 7 ++ 11 files changed, 276 insertions(+), 135 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index f1ae2baa3478..e24d671b66e7 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -1,6 +1,8 @@ #include #include +#include +namespace fs = std::filesystem; namespace ProfileEvents { @@ -33,40 +35,68 @@ void EvictionCandidates::add(LockedKey & locked_key, const FileSegmentMetadataPt } void EvictionCandidates::evict(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock & lock) +{ + evictImpl(false, query_context, lock); +} + +std::vector EvictionCandidates::evictFromMemory( + FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock & lock) +{ + return evictImpl(true, query_context, lock); +} + +std::vector EvictionCandidates::evictImpl( + bool remove_only_metadata, + FileCacheQueryLimit::QueryContext * query_context, + const CachePriorityGuard::Lock & lock) { if (candidates.empty()) - return; + return {}; auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); - for (auto & [key, key_candidates] : candidates) + std::vector evicted_paths; + try { - auto locked_key = key_candidates.key_metadata->tryLock(); - if (!locked_key) - continue; /// key could become invalid after we released the key lock above, just skip it. - - auto & to_evict = key_candidates.candidates; - while (!to_evict.empty()) + for (auto & [key, key_candidates] : candidates) { - auto & candidate = to_evict.back(); - chassert(candidate->releasable()); + auto locked_key = key_candidates.key_metadata->tryLock(); + if (!locked_key) + continue; /// key could become invalid after we released the key lock above, just skip it. + + auto & to_evict = key_candidates.candidates; + while (!to_evict.empty()) + { + auto & candidate = to_evict.back(); + chassert(candidate->releasable()); + + const auto segment = candidate->file_segment; + auto queue_it = segment->getQueueIterator(); + chassert(queue_it); - const auto segment = candidate->file_segment; - auto queue_it = segment->getQueueIterator(); - chassert(queue_it); + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); + ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size()); - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size()); + if (remove_only_metadata) + evicted_paths.push_back(segment->getPath()); - locked_key->removeFileSegment(segment->offset(), segment->lock()); - queue_it->remove(lock); + locked_key->removeFileSegment( + segment->offset(), segment->lock(), /* can_be_broken */false, remove_only_metadata); - if (query_context) - query_context->remove(segment->key(), segment->offset(), lock); + queue_it->remove(lock); + if (query_context) + query_context->remove(segment->key(), segment->offset(), lock); - to_evict.pop_back(); + to_evict.pop_back(); + } } } + catch (...) + { + for (const auto & path : evicted_paths) + fs::remove(path); + throw; + } + return evicted_paths; } - } diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index e817d33d5fef..ec29692d8d37 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -7,12 +7,23 @@ namespace DB class EvictionCandidates { public: + EvictionCandidates() = default; + EvictionCandidates(EvictionCandidates && other) noexcept + { + candidates = std::move(other.candidates); + candidates_size = std::move(other.candidates_size); + invalidated_queue_entries = std::move(other.invalidated_queue_entries); + } ~EvictionCandidates(); void add(LockedKey & locked_key, const FileSegmentMetadataPtr & candidate); + void add(const EvictionCandidates & other, const CachePriorityGuard::Lock &) { candidates.insert(other.candidates.begin(), other.candidates.end()); } + void evict(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock &); + std::vector evictFromMemory(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock &); + size_t size() const { return candidates_size; } auto begin() const { return candidates.begin(); } @@ -28,6 +39,13 @@ class EvictionCandidates std::unordered_map candidates; size_t candidates_size = 0; + + std::vector invalidated_queue_entries; + + std::vector evictImpl( + bool remove_only_metadata, + FileCacheQueryLimit::QueryContext * query_context, + const CachePriorityGuard::Lock & lock); }; using EvictionCandidatesPtr = std::unique_ptr; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 916bdb8f8989..4e41c308bf26 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -180,6 +180,7 @@ void FileCache::initialize() } metadata.startup(); + is_initialized = true; } @@ -1340,34 +1341,33 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, if (new_settings.max_size != actual_settings.max_size || new_settings.max_elements != actual_settings.max_elements) { - cache_is_being_resized.store(true, std::memory_order_relaxed); - SCOPE_EXIT({ - cache_is_being_resized.store(false, std::memory_order_relaxed); - }); - - auto cache_lock = lockCache(); - bool updated = false; - try + std::vector evicted_paths; { - updated = main_priority->modifySizeLimits( + cache_is_being_resized.store(true, std::memory_order_relaxed); + SCOPE_EXIT({ + cache_is_being_resized.store(false, std::memory_order_relaxed); + }); + + auto cache_lock = lockCache(); + FileCacheReserveStat stat; + auto eviction_candidates = main_priority->collectCandidatesForEviction( + new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, stat, cache_lock); + + evicted_paths = eviction_candidates.evictFromMemory(nullptr, cache_lock); + + main_priority->modifySizeLimits( new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); } - catch (...) - { - actual_settings.max_size = main_priority->getSizeLimit(cache_lock); - actual_settings.max_elements = main_priority->getElementsLimit(cache_lock); - throw; - } - if (updated) - { - LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", - actual_settings.max_size, new_settings.max_size, - actual_settings.max_elements, new_settings.max_elements); + for (const auto & path : evicted_paths) + fs::remove(path); - actual_settings.max_size = main_priority->getSizeLimit(cache_lock); - actual_settings.max_elements = main_priority->getElementsLimit(cache_lock); - } + LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", + actual_settings.max_size, new_settings.max_size, + actual_settings.max_elements, new_settings.max_elements); + + actual_settings.max_size = new_settings.max_size; + actual_settings.max_elements = new_settings.max_elements; } if (new_settings.max_file_segment_size != actual_settings.max_file_segment_size) diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 1433a067e7ed..087e1ad344d2 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -18,6 +18,7 @@ #include #include #include +#include #include diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index 09d71cebb01f..bcde64bce984 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -107,7 +107,19 @@ class IFileCachePriority : private boost::noncopyable const UserID & user_id, const CachePriorityGuard::Lock &) = 0; - virtual bool modifySizeLimits(size_t max_size_, size_t max_elements_, double size_ratio_, const CachePriorityGuard::Lock &) = 0; + /// Collect eviction `candidates_num` candidates for eviction. + virtual EvictionCandidates collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + const CachePriorityGuard::Lock &) = 0; + + virtual bool modifySizeLimits( + size_t max_size_, + size_t max_elements_, + double size_ratio_, + const CachePriorityGuard::Lock &) = 0; protected: IFileCachePriority(size_t max_size_, size_t max_elements_); diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 08e65b577ca2..4189170c5ef1 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -16,9 +16,6 @@ namespace ProfileEvents { extern const Event FilesystemCacheEvictionSkippedFileSegments; extern const Event FilesystemCacheEvictionTries; - extern const Event FilesystemCacheEvictMicroseconds; - extern const Event FilesystemCacheEvictedBytes; - extern const Event FilesystemCacheEvictedFileSegments; } namespace DB @@ -205,17 +202,22 @@ bool LRUFileCachePriority::canFit( /// NOLINT IteratorPtr, bool) const { - return canFit(size, 0, 0, lock); + return canFit(size, 1, 0, 0, lock); } bool LRUFileCachePriority::canFit( size_t size, + size_t elements, size_t released_size_assumption, size_t released_elements_assumption, - const CachePriorityGuard::Lock &) const + const CachePriorityGuard::Lock &, + const size_t * max_size_, + const size_t * max_elements_) const { - return (max_size == 0 || (state->current_size + size - released_size_assumption <= max_size)) - && (max_elements == 0 || state->current_elements_num + 1 - released_elements_assumption <= max_elements); + return (max_size == 0 + || (state->current_size + size - released_size_assumption <= (max_size_ ? *max_size_ : max_size))) + && (max_elements == 0 + || state->current_elements_num + elements - released_elements_assumption <= (max_elements_ ? *max_elements_ : max_elements)); } bool LRUFileCachePriority::collectCandidatesForEviction( @@ -230,6 +232,38 @@ bool LRUFileCachePriority::collectCandidatesForEviction( if (canFit(size, lock)) return true; + auto can_fit = [&] + { + return canFit(size, 1, stat.stat.releasable_size, stat.stat.releasable_count, lock); + }; + iterateForEviction(res, stat, can_fit, lock); + return can_fit(); +} + +EvictionCandidates LRUFileCachePriority::collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + const CachePriorityGuard::Lock & lock) +{ + EvictionCandidates res; + auto stop_condition = [&, this]() + { + return canFit(0, 0, stat.stat.releasable_size, stat.stat.releasable_count, + lock, &desired_size, &desired_elements_count) + || (max_candidates_to_evict && res.size() >= max_candidates_to_evict); + }; + iterateForEviction(res, stat, stop_condition, lock); + return res; +} + +void LRUFileCachePriority::iterateForEviction( + EvictionCandidates & res, + FileCacheReserveStat & stat, + StopConditionFunc stop_condition, + const CachePriorityGuard::Lock & lock) +{ ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionTries); IterateFunc iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) @@ -244,27 +278,23 @@ bool LRUFileCachePriority::collectCandidatesForEviction( } else { - stat.update(segment_metadata->size(), file_segment->getKind(), false); ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictionSkippedFileSegments); + stat.update(segment_metadata->size(), file_segment->getKind(), false); } return IterationResult::CONTINUE; }; - auto can_fit = [&] - { - return canFit(size, stat.stat.releasable_size, stat.stat.releasable_count, lock); - }; - iterate([&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) { - return can_fit() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); + return stop_condition() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); }, lock); - - return can_fit(); } -LRUFileCachePriority::LRUIterator LRUFileCachePriority::move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &) +LRUFileCachePriority::LRUIterator LRUFileCachePriority::move( + LRUIterator & it, + LRUFileCachePriority & other, + const CachePriorityGuard::Lock &) { const auto & entry = *it.getEntry(); if (entry.size == 0) @@ -309,45 +339,20 @@ IFileCachePriority::PriorityDumpPtr LRUFileCachePriority::dump(const CachePriori } bool LRUFileCachePriority::modifySizeLimits( - size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CachePriorityGuard::Lock & lock) + size_t max_size_, size_t max_elements_, double /* size_ratio_ */, const CachePriorityGuard::Lock &) { if (max_size == max_size_ && max_elements == max_elements_) return false; /// Nothing to change. - auto check_limits_satisfied = [&]() - { - return (max_size_ == 0 || state->current_size <= max_size_) - && (max_elements_ == 0 || state->current_elements_num <= max_elements_); - }; - - if (check_limits_satisfied()) + if (state->current_size > max_size_ || state->current_elements_num > max_elements_) { - max_size = max_size_; - max_elements = max_elements_; - return true; + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot modify size limits to {} in size and {} in elements: " + "not enough space freed. Current size: {}/{}, elements: {}/{}", + max_size_, max_elements_, + state->current_size, max_size, state->current_elements_num, max_elements); } - auto iterate_func = [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) - { - chassert(segment_metadata->file_segment->assertCorrectness()); - - if (!segment_metadata->releasable()) - return IterationResult::CONTINUE; - - auto segment = segment_metadata->file_segment; - locked_key.removeFileSegment(segment->offset(), segment->lock()); - - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); - ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->getDownloadedSize()); - return IterationResult::REMOVE_AND_CONTINUE; - }; - - auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); - iterate( - [&](LockedKey & locked_key, const FileSegmentMetadataPtr & segment_metadata) - { return check_limits_satisfied() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); }, - lock); - max_size = max_size_; max_elements = max_elements_; return true; diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 49977c79b810..16034f379f33 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -55,6 +55,13 @@ class LRUFileCachePriority final : public IFileCachePriority const UserID & user_id, const CachePriorityGuard::Lock &) override; + EvictionCandidates collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + const CachePriorityGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; struct LRUPriorityDump : public IPriorityDump @@ -81,7 +88,14 @@ class LRUFileCachePriority final : public IFileCachePriority void updateElementsCount(int64_t num); void updateSize(int64_t size); - bool canFit(size_t size, size_t released_size_assumption, size_t released_elements_assumption, const CachePriorityGuard::Lock &) const; + bool canFit( + size_t size, + size_t elements, + size_t released_size_assumption, + size_t released_elements_assumption, + const CachePriorityGuard::Lock &, + const size_t * max_size_ = nullptr, + const size_t * max_elements_ = nullptr) const; LRUQueue::iterator remove(LRUQueue::iterator it, const CachePriorityGuard::Lock &); @@ -96,6 +110,13 @@ class LRUFileCachePriority final : public IFileCachePriority LRUIterator move(LRUIterator & it, LRUFileCachePriority & other, const CachePriorityGuard::Lock &); LRUIterator add(EntryPtr entry, const CachePriorityGuard::Lock &); + + using StopConditionFunc = std::function; + void iterateForEviction( + EvictionCandidates & res, + FileCacheReserveStat & stat, + StopConditionFunc stop_condition, + const CachePriorityGuard::Lock &); }; class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index b79605622b62..65bbcb24cfa4 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -899,32 +899,34 @@ bool LockedKey::removeAllFileSegments(bool if_releasable) return removed_all; } -KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, bool can_be_broken) +KeyMetadata::iterator LockedKey::removeFileSegment(size_t offset, bool can_be_broken, bool remove_only_metadata) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {}", offset); auto file_segment = it->second->file_segment; - return removeFileSegmentImpl(it, file_segment->lock(), can_be_broken); + return removeFileSegmentImpl(it, file_segment->lock(), can_be_broken, remove_only_metadata); } KeyMetadata::iterator LockedKey::removeFileSegment( size_t offset, const FileSegmentGuard::Lock & segment_lock, - bool can_be_broken) + bool can_be_broken, + bool remove_only_metadata) { auto it = key_metadata->find(offset); if (it == key_metadata->end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no offset {} in key {}", offset, getKey()); - return removeFileSegmentImpl(it, segment_lock, can_be_broken); + return removeFileSegmentImpl(it, segment_lock, can_be_broken, remove_only_metadata); } KeyMetadata::iterator LockedKey::removeFileSegmentImpl( KeyMetadata::iterator it, const FileSegmentGuard::Lock & segment_lock, - bool can_be_broken) + bool can_be_broken, + bool remove_only_metadata) { auto file_segment = it->second->file_segment; @@ -939,47 +941,50 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( file_segment->detach(segment_lock, *this); - try + if (!remove_only_metadata) { - const auto path = key_metadata->getFileSegmentPath(*file_segment); - if (file_segment->segment_kind == FileSegmentKind::Temporary) + try { - /// FIXME: For temporary file segment the requirement is not as strong because - /// the implementation of "temporary data in cache" creates files in advance. - if (fs::exists(path)) + const auto path = key_metadata->getFileSegmentPath(*file_segment); + if (file_segment->segment_kind == FileSegmentKind::Temporary) + { + /// FIXME: For temporary file segment the requirement is not as strong because + /// the implementation of "temporary data in cache" creates files in advance. + if (fs::exists(path)) + fs::remove(path); + } + else if (file_segment->downloaded_size == 0) + { + chassert(!fs::exists(path)); + } + else if (fs::exists(path)) + { fs::remove(path); - } - else if (file_segment->downloaded_size == 0) - { - chassert(!fs::exists(path)); - } - else if (fs::exists(path)) - { - fs::remove(path); - /// Clear OpenedFileCache to avoid reading from incorrect file descriptor. - int flags = file_segment->getFlagsForLocalRead(); - /// Files are created with flags from file_segment->getFlagsForLocalRead() - /// plus optionally O_DIRECT is added, depends on query setting, so remove both. - OpenedFileCache::instance().remove(path, flags); - OpenedFileCache::instance().remove(path, flags | O_DIRECT); + /// Clear OpenedFileCache to avoid reading from incorrect file descriptor. + int flags = file_segment->getFlagsForLocalRead(); + /// Files are created with flags from file_segment->getFlagsForLocalRead() + /// plus optionally O_DIRECT is added, depends on query setting, so remove both. + OpenedFileCache::instance().remove(path, flags); + OpenedFileCache::instance().remove(path, flags | O_DIRECT); - LOG_TEST(key_metadata->logger(), "Removed file segment at path: {}", path); - } - else if (!can_be_broken) - { + LOG_TEST(key_metadata->logger(), "Removed file segment at path: {}", path); + } + else if (!can_be_broken) + { #ifdef ABORT_ON_LOGICAL_ERROR - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); #else - LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", - path, getKey(), file_segment->offset()); + LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", + path, getKey(), file_segment->offset()); #endif + } + } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - chassert(false); } return key_metadata->erase(it); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index c02127cdef30..6f63132c568f 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -269,8 +269,16 @@ struct LockedKey : private boost::noncopyable bool removeAllFileSegments(bool if_releasable = true); - KeyMetadata::iterator removeFileSegment(size_t offset, const FileSegmentGuard::Lock &, bool can_be_broken = false); - KeyMetadata::iterator removeFileSegment(size_t offset, bool can_be_broken = false); + KeyMetadata::iterator removeFileSegment( + size_t offset, + const FileSegmentGuard::Lock &, + bool can_be_broken = false, + bool remove_only_metadata = false); + + KeyMetadata::iterator removeFileSegment( + size_t offset, + bool can_be_broken = false, + bool remove_only_metadata = false); void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); @@ -289,7 +297,11 @@ struct LockedKey : private boost::noncopyable std::string toString() const; private: - KeyMetadata::iterator removeFileSegmentImpl(KeyMetadata::iterator it, const FileSegmentGuard::Lock &, bool can_be_broken = false); + KeyMetadata::iterator removeFileSegmentImpl( + KeyMetadata::iterator it, + const FileSegmentGuard::Lock &, + bool can_be_broken = false, + bool remove_only_metadata_ = false); const std::shared_ptr key_metadata; KeyGuard::Lock lock; /// `lock` must be destructed before `key_metadata`. diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index c97d05d4b84d..a405c237d71f 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -169,6 +169,36 @@ bool SLRUFileCachePriority::collectCandidatesForEviction( return true; } +EvictionCandidates SLRUFileCachePriority::collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + const CachePriorityGuard::Lock & lock) +{ + const auto desired_probationary_size = getRatio(desired_size, 1 - size_ratio); + const auto desired_probationary_elements_num = getRatio(desired_elements_count, 1 - size_ratio); + + auto res = probationary_queue.collectCandidatesForEviction( + desired_probationary_size, desired_probationary_elements_num, max_candidates_to_evict, stat, lock); + + chassert(!max_candidates_to_evict || res.size() <= max_candidates_to_evict); + chassert(res.size() == stat.stat.releasable_count); + + if (max_candidates_to_evict && res.size() == max_candidates_to_evict) + return res; + + const auto desired_protected_size = getRatio(max_size, size_ratio); + const auto desired_protected_elements_num = getRatio(max_elements, size_ratio); + + auto res_add = protected_queue.collectCandidatesForEviction( + desired_protected_size, desired_protected_elements_num, + max_candidates_to_evict ? max_candidates_to_evict - res.size() : 0, stat, lock); + + res.add(res_add, lock); + return res; +} + void SLRUFileCachePriority::increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock) { /// If entry is already in protected queue, diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index f90918f78780..b5d75fc7955d 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -52,6 +52,13 @@ class SLRUFileCachePriority : public IFileCachePriority const UserID & user_id, const CachePriorityGuard::Lock &) override; + EvictionCandidates collectCandidatesForEviction( + size_t desired_size, + size_t desired_elements_count, + size_t max_candidates_to_evict, + FileCacheReserveStat & stat, + const CachePriorityGuard::Lock &) override; + void shuffle(const CachePriorityGuard::Lock &) override; PriorityDumpPtr dump(const CachePriorityGuard::Lock &) override; From a85886c2e0b1d2997b1b5192fe7a489181668041 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Fri, 22 Mar 2024 16:26:43 +0000 Subject: [PATCH 028/470] AggregatingMergeTree: Split table creation and MV definition + add more to example --- .../mergetree-family/aggregatingmergetree.md | 51 +++++++++++++++---- 1 file changed, 42 insertions(+), 9 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md index 62191d9b5e4c..7a449f400fdc 100644 --- a/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -68,6 +68,12 @@ In the results of `SELECT` query, the values of `AggregateFunction` type have im ## Example of an Aggregated Materialized View {#example-of-an-aggregated-materialized-view} +The following examples assumes that you have a database named `test` so make sure you create that if it doesn't already exist: + +```sql +CREATE DATABASE test; +``` + We will create the table `test.visits` that contain the raw data: ``` sql @@ -80,17 +86,24 @@ CREATE TABLE test.visits ) ENGINE = MergeTree ORDER BY (StartDate, CounterID); ``` +Next, we need to create an `AggregatingMergeTree` table that will store `AggregationFunction`s that keep track of the total number of visits and the number of unique users. + `AggregatingMergeTree` materialized view that watches the `test.visits` table, and use the `AggregateFunction` type: ``` sql -CREATE MATERIALIZED VIEW test.mv_visits -( +CREATE TABLE test.agg_visits ( StartDate DateTime64 NOT NULL, CounterID UInt64, Visits AggregateFunction(sum, Nullable(Int32)), Users AggregateFunction(uniq, Nullable(Int32)) ) -ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID) +ENGINE = AggregatingMergeTree() ORDER BY (StartDate, CounterID); +``` + +And then let's create a materialized view that populates `test.agg_visits` from `test.visits` : + +```sql +CREATE MATERIALIZED VIEW test.visits_mv TO test.agg_visits AS SELECT StartDate, CounterID, @@ -104,25 +117,45 @@ Inserting data into the `test.visits` table. ``` sql INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) - VALUES (1667446031, 1, 3, 4) -INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) - VALUES (1667446031, 1, 6, 3) + VALUES (1667446031000, 1, 3, 4), (1667446031000, 1, 6, 3); ``` -The data is inserted in both the table and the materialized view `test.mv_visits`. +The data is inserted in both `test.visits` and `test.agg_visits`. To get the aggregated data, we need to execute a query such as `SELECT ... GROUP BY ...` from the materialized view `test.mv_visits`: -``` sql +```sql SELECT StartDate, sumMerge(Visits) AS Visits, uniqMerge(Users) AS Users -FROM test.mv_visits +FROM test.agg_visits GROUP BY StartDate ORDER BY StartDate; ``` +```text +┌───────────────StartDate─┬─Visits─┬─Users─┐ +│ 2022-11-03 03:27:11.000 │ 9 │ 2 │ +└─────────────────────────┴────────┴───────┘ +``` + +And how about if we add another couple of records to `test.visits`, but this time we'll use a different timestamp for one of the records: + +```sql +INSERT INTO test.visits (StartDate, CounterID, Sign, UserID) + VALUES (1669446031000, 2, 5, 10), (1667446031000, 3, 7, 5); +``` + +If we then run the `SELECT` query again, we'll see the following output: + +```text +┌───────────────StartDate─┬─Visits─┬─Users─┐ +│ 2022-11-03 03:27:11.000 │ 16 │ 3 │ +│ 2022-11-26 07:00:31.000 │ 5 │ 1 │ +└─────────────────────────┴────────┴───────┘ +``` + ## Related Content - Blog: [Using Aggregate Combinators in ClickHouse](https://clickhouse.com/blog/aggregate-functions-combinators-in-clickhouse-for-arrays-maps-and-states) From 513bb7ddbb2a0ab0dcab55d6637025b7fa36ab8b Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 25 Mar 2024 13:54:06 +0100 Subject: [PATCH 029/470] Minor --- src/Interpreters/Cache/EvictionCandidates.cpp | 9 ++++++++- src/Interpreters/Cache/EvictionCandidates.h | 2 +- src/Interpreters/Cache/SLRUFileCachePriority.cpp | 2 +- 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index e24d671b66e7..06125f8efc22 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -40,7 +40,8 @@ void EvictionCandidates::evict(FileCacheQueryLimit::QueryContext * query_context } std::vector EvictionCandidates::evictFromMemory( - FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock & lock) + FileCacheQueryLimit::QueryContext * query_context, + const CachePriorityGuard::Lock & lock) { return evictImpl(true, query_context, lock); } @@ -99,4 +100,10 @@ std::vector EvictionCandidates::evictImpl( } return evicted_paths; } + +void EvictionCandidates::insert(EvictionCandidates && other, const CachePriorityGuard::Lock &) +{ + candidates.insert(make_move_iterator(other.candidates.begin()), make_move_iterator(other.candidates.end())); +} + } diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index ec29692d8d37..afe6880d497b 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -18,7 +18,7 @@ class EvictionCandidates void add(LockedKey & locked_key, const FileSegmentMetadataPtr & candidate); - void add(const EvictionCandidates & other, const CachePriorityGuard::Lock &) { candidates.insert(other.candidates.begin(), other.candidates.end()); } + void insert(EvictionCandidates && other, const CachePriorityGuard::Lock &); void evict(FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock &); diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index a405c237d71f..681ceef04ea6 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -195,7 +195,7 @@ EvictionCandidates SLRUFileCachePriority::collectCandidatesForEviction( desired_protected_size, desired_protected_elements_num, max_candidates_to_evict ? max_candidates_to_evict - res.size() : 0, stat, lock); - res.add(res_add, lock); + res.insert(std::move(res_add), lock); return res; } From 3b8763339219f7056e7ae8b8c2107a8f2e28604a Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Mon, 25 Mar 2024 14:09:04 +0000 Subject: [PATCH 030/470] better --- tests/ci/ci.py | 15 +++------------ tests/ci/clickhouse_helper.py | 10 ---------- 2 files changed, 3 insertions(+), 22 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index f2e0828082cb..b913550e3c90 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1616,13 +1616,12 @@ def _upload_build_profile_data( def _add_build_to_version_history( pr_info: PRInfo, job_report: JobReport, - git_ref: str, version: str, docker_tag: str, ch_helper: ClickHouseHelper, ) -> None: # with some probability we will not silently break this logic - assert pr_info.sha and pr_info.commit_html_url and version and git_ref + assert pr_info.sha and pr_info.commit_html_url and pr_info.head_ref and version data = { "check_start_time": job_report.start_time, @@ -1632,19 +1631,12 @@ def _add_build_to_version_history( "commit_url": pr_info.commit_html_url, "version": version, "docker_tag": docker_tag, - "git_ref": git_ref, + "git_ref": pr_info.head_ref, } - json_str = json.dumps(data) - print(f"::notice ::Log Adding record to versions history: {json_str}") - try: - ch_helper.insert_json_into( - db="default", table="version_history", json_str=json_str - ) - except InsertException: - logging.error("Failed to insert profile data for the build, continue") + ch_helper.insert_event_into(db="default", table="version_history", event=data) def _run_test(job_name: str, run_command: str) -> int: @@ -2025,7 +2017,6 @@ def main() -> int: _add_build_to_version_history( pr_info, job_report, - indata["git_ref"], indata["version"], indata["build"], ch_helper, diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index 7a119ee15776..637c4519d3d7 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -109,16 +109,6 @@ def _insert_post(*args, **kwargs): def _insert_json_str_info(self, db, table, json_str): self.insert_json_str(self.url, self.auth, db, table, json_str) - def insert_json_into(self, db, table, json_str, safe=True): - try: - self._insert_json_str_info(db, table, json_str) - except InsertException as e: - logging.error( - "Exception happened during inserting data into clickhouse: %s", e - ) - if not safe: - raise - def insert_event_into(self, db, table, event, safe=True): event_str = json.dumps(event) try: From 071a8ff95f5656ab18433dc03b19fce12e5855ab Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 25 Mar 2024 18:55:46 +0100 Subject: [PATCH 031/470] less unformatted exceptions --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +- src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 34 +++++------ .../MergeTree/MergeTreeDataMergerMutator.h | 10 ++-- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 41 +++++++------ .../MergeTree/ReplicatedMergeTreeQueue.h | 8 +-- src/Storages/StorageMergeTree.cpp | 58 +++++++++---------- src/Storages/StorageMergeTree.h | 6 +- src/Storages/StorageReplicatedMergeTree.cpp | 20 +++---- 9 files changed, 91 insertions(+), 94 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e9f3b48f88cc..7a2ddc77724a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -7773,11 +7773,11 @@ MovePartsOutcome MergeTreeData::moveParts(const CurrentlyMovingPartsTaggerPtr & return result; } -bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason) +bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, PreformattedMessage & out_reason) { if (left->getProjectionParts().size() != right->getProjectionParts().size()) { - out_reason = fmt::format( + out_reason = PreformattedMessage::create( "Parts have different number of projections: {} in part '{}' and {} in part '{}'", left->getProjectionParts().size(), left->name, @@ -7791,7 +7791,7 @@ bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const { if (!right->hasProjection(name)) { - out_reason = fmt::format( + out_reason = PreformattedMessage::create( "The part '{}' doesn't have projection '{}' while part '{}' does", right->name, name, left->name ); return false; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8305c7c6ce90..9081d384a262 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -418,7 +418,7 @@ class MergeTreeData : public IStorage, public WithMutableContext static ReservationPtr tryReserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); static ReservationPtr reserveSpace(UInt64 expected_size, const IDataPartStorage & data_part_storage); - static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, String & out_reason); + static bool partsContainSameProjections(const DataPartPtr & left, const DataPartPtr & right, PreformattedMessage & out_reason); StoragePolicyPtr getStoragePolicy() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 53d49b51e8fe..2d49e1df19b3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -136,7 +136,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( const AllowedMergingPredicate & can_merge_callback, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, const PartitionIdsHint * partitions_hint) { MergeTreeData::DataPartsVector data_parts = getDataPartsToSelectMergeFrom(txn, partitions_hint); @@ -145,7 +145,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (data_parts.empty()) { - out_disable_reason = "There are no parts in the table"; + out_disable_reason = PreformattedMessage::create("There are no parts in the table"); return SelectPartsDecision::CANNOT_SELECT; } @@ -153,7 +153,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( if (info.parts_selected_precondition == 0) { - out_disable_reason = "No parts satisfy preconditions for merge"; + out_disable_reason = PreformattedMessage::create("No parts satisfy preconditions for merge"); return SelectPartsDecision::CANNOT_SELECT; } @@ -177,9 +177,9 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /*optimize_skip_merged_partitions=*/true); } - if (!out_disable_reason.empty()) - out_disable_reason += ". "; - out_disable_reason += "There is no need to merge parts according to merge selector algorithm"; + if (!out_disable_reason.text.empty()) + out_disable_reason.text += ". "; + out_disable_reason.text += "There is no need to merge parts according to merge selector algorithm"; return SelectPartsDecision::CANNOT_SELECT; } @@ -196,7 +196,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart auto metadata_snapshot = data.getInMemoryMetadataPtr(); - String out_reason; + PreformattedMessage out_reason; MergeSelectingInfo info = getPossibleMergeRanges(data_parts, can_merge_callback, txn, out_reason); if (info.parts_selected_precondition == 0) @@ -223,7 +223,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart for (size_t i = 0; i < all_partition_ids.size(); ++i) { auto future_part = std::make_shared(); - String out_disable_reason; + PreformattedMessage out_disable_reason; /// This method should have been const, but something went wrong... it's const with dry_run = true auto status = const_cast(this)->selectPartsToMergeFromRanges( future_part, /*aggressive*/ false, max_total_size_to_merge, merge_with_ttl_allowed, @@ -232,7 +232,7 @@ MergeTreeDataMergerMutator::PartitionIdsHint MergeTreeDataMergerMutator::getPart if (status == SelectPartsDecision::SELECTED) res.insert(all_partition_ids[i]); else - LOG_TEST(log, "Nothing to merge in partition {}: {}", all_partition_ids[i], out_disable_reason); + LOG_TEST(log, "Nothing to merge in partition {}: {}", all_partition_ids[i], out_disable_reason.text); } String best_partition_id_to_optimize = getBestPartitionToOptimizeEntire(info.partitions_info); @@ -331,7 +331,7 @@ MergeTreeDataMergerMutator::MergeSelectingInfo MergeTreeDataMergerMutator::getPo const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String & out_disable_reason) const + PreformattedMessage & out_disable_reason) const { MergeSelectingInfo res; @@ -444,7 +444,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool dry_run) { const auto data_settings = data.getSettings(); @@ -515,7 +515,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMergeFromRanges( if (parts_to_merge.empty()) { - out_disable_reason = "Did not find any parts to merge (with usual merge selectors)"; + out_disable_reason = PreformattedMessage::create("Did not find any parts to merge (with usual merge selectors)"); return SelectPartsDecision::CANNOT_SELECT; } } @@ -573,20 +573,20 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions) { MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id); if (parts.empty()) { - out_disable_reason = "There are no parts inside partition"; + out_disable_reason = PreformattedMessage::create("There are no parts inside partition"); return SelectPartsDecision::CANNOT_SELECT; } if (!final && parts.size() == 1) { - out_disable_reason = "There is only one part inside partition"; + out_disable_reason = PreformattedMessage::create("There is only one part inside partition"); return SelectPartsDecision::CANNOT_SELECT; } @@ -595,7 +595,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti if (final && optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0 && (!metadata_snapshot->hasAnyTTL() || parts[0]->checkAllTTLCalculated(metadata_snapshot))) { - out_disable_reason = "Partition skipped due to optimize_skip_merged_partitions"; + out_disable_reason = PreformattedMessage::create("Partition skipped due to optimize_skip_merged_partitions"); return SelectPartsDecision::NOTHING_TO_MERGE; } @@ -636,7 +636,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectAllPartsToMergeWithinParti static_cast((DISK_USAGE_COEFFICIENT_TO_SELECT - 1.0) * 100)); } - out_disable_reason = fmt::format("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); + out_disable_reason = PreformattedMessage::create("Insufficient available disk space, required {}", ReadableSize(required_disk_space)); return SelectPartsDecision::CANNOT_SELECT; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 669ee040af33..aad34bfb914c 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -43,7 +43,7 @@ class MergeTreeDataMergerMutator using AllowedMergingPredicate = std::function; + PreformattedMessage &)>; explicit MergeTreeDataMergerMutator(MergeTreeData & data_); @@ -92,7 +92,7 @@ class MergeTreeDataMergerMutator const MergeTreeData::DataPartsVector & data_parts, const AllowedMergingPredicate & can_merge_callback, const MergeTreeTransactionPtr & txn, - String & out_disable_reason) const; + PreformattedMessage & out_disable_reason) const; /// The third step of selecting parts to merge: takes ranges that we can merge, and selects parts that we want to merge SelectPartsDecision selectPartsToMergeFromRanges( @@ -103,7 +103,7 @@ class MergeTreeDataMergerMutator const StorageMetadataPtr & metadata_snapshot, const IMergeSelector::PartsRanges & parts_ranges, const time_t & current_time, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool dry_run = false); String getBestPartitionToOptimizeEntire(const PartitionsInfo & partitions_info) const; @@ -129,7 +129,7 @@ class MergeTreeDataMergerMutator const AllowedMergingPredicate & can_merge, bool merge_with_ttl_allowed, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, const PartitionIdsHint * partitions_hint = nullptr); /** Select all the parts in the specified partition for merge, if possible. @@ -144,7 +144,7 @@ class MergeTreeDataMergerMutator bool final, const StorageMetadataPtr & metadata_snapshot, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions = false); /** Creates a task to merge parts. diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 42f564f40dae..d7168ff57be5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -2266,7 +2266,7 @@ bool BaseMergePredicate::operator()( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction *, - String & out_reason) const + PreformattedMessage & out_reason) const { if (left) return canMergeTwoParts(left, right, out_reason); @@ -2278,7 +2278,7 @@ template bool BaseMergePredicate::canMergeTwoParts( const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String & out_reason) const + PreformattedMessage & out_reason) const { /// A sketch of a proof of why this method actually works: /// @@ -2322,19 +2322,19 @@ bool BaseMergePredicate::canMergeTwoParts( { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - out_reason = "Part " + part->name + " has uuid " + toString(part->uuid) + " which is currently pinned"; + out_reason = PreformattedMessage::create("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - out_reason = "Quorum insert for part " + part->name + " is currently in progress"; + out_reason = PreformattedMessage::create("Quorum insert for part {} is currently in progress", part->name); return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - out_reason = "Entry for part " + part->name + " hasn't been read from the replication log yet"; + out_reason = PreformattedMessage::create("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } } @@ -2348,7 +2348,7 @@ bool BaseMergePredicate::canMergeTwoParts( { if (partition_ids_hint && !partition_ids_hint->contains(left->info.partition_id)) { - out_reason = fmt::format("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); + out_reason = PreformattedMessage::create("Uncommitted block were not loaded for unexpected partition {}", left->info.partition_id); return false; } @@ -2360,8 +2360,7 @@ bool BaseMergePredicate::canMergeTwoParts( auto block_it = block_numbers.upper_bound(left_max_block); if (block_it != block_numbers.end() && *block_it < right_min_block) { - out_reason = "Block number " + toString(*block_it) + " is still being inserted between parts " - + left->name + " and " + right->name; + out_reason = PreformattedMessage::create("Block number {} is still being inserted between parts {} and {}", *block_it, left->name, right->name); return false; } } @@ -2380,7 +2379,7 @@ bool BaseMergePredicate::canMergeTwoParts( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - out_reason = "Part " + part->name + " has already been assigned a merge into " + containing_part; + out_reason = PreformattedMessage::create("Part {} has already been assigned a merge into {}", part->name, containing_part); return false; } } @@ -2397,9 +2396,9 @@ bool BaseMergePredicate::canMergeTwoParts( Strings covered = virtual_parts_->getPartsCoveredBy(gap_part_info); if (!covered.empty()) { - out_reason = "There are " + toString(covered.size()) + " parts (from " + covered.front() - + " to " + covered.back() + ") that are still not present or being processed by " - + " other background process on this replica between " + left->name + " and " + right->name; + out_reason = PreformattedMessage::create("There are {} parts (from {} to {}) " + "that are still not present or being processed by other background process " + "on this replica between {} and {}", covered.size(), covered.front(), covered.back(), left->name, right->name); return false; } } @@ -2415,8 +2414,8 @@ bool BaseMergePredicate::canMergeTwoParts( if (left_mutation_ver != right_mutation_ver) { - out_reason = "Current mutation versions of parts " + left->name + " and " + right->name + " differ: " - + toString(left_mutation_ver) + " and " + toString(right_mutation_ver) + " respectively"; + out_reason = PreformattedMessage::create("Current mutation versions of parts {} and {} differ: " + "{} and {} respectively", left->name, right->name, left_mutation_ver, right_mutation_ver); return false; } } @@ -2427,23 +2426,23 @@ bool BaseMergePredicate::canMergeTwoParts( template bool BaseMergePredicate::canMergeSinglePart( const MergeTreeData::DataPartPtr & part, - String & out_reason) const + PreformattedMessage & out_reason) const { if (pinned_part_uuids_ && pinned_part_uuids_->part_uuids.contains(part->uuid)) { - out_reason = fmt::format("Part {} has uuid {} which is currently pinned", part->name, part->uuid); + out_reason = PreformattedMessage::create("Part {} has uuid {} which is currently pinned", part->name, part->uuid); return false; } if (inprogress_quorum_part_ && part->name == *inprogress_quorum_part_) { - out_reason = fmt::format("Quorum insert for part {} is currently in progress", part->name); + out_reason = PreformattedMessage::create("Quorum insert for part {} is currently in progress", part->name); return false; } if (prev_virtual_parts_ && prev_virtual_parts_->getContainingPart(part->info).empty()) { - out_reason = fmt::format("Entry for part {} hasn't been read from the replication log yet", part->name); + out_reason = PreformattedMessage::create("Entry for part {} hasn't been read from the replication log yet", part->name); return false; } @@ -2458,7 +2457,7 @@ bool BaseMergePredicate::canMergeSinglePart( String containing_part = virtual_parts_->getContainingPart(part->info); if (containing_part != part->name) { - out_reason = fmt::format("Part {} has already been assigned a merge into {}", part->name, containing_part); + out_reason = PreformattedMessage::create("Part {} has already been assigned a merge into {}", part->name, containing_part); return false; } } @@ -2467,7 +2466,7 @@ bool BaseMergePredicate::canMergeSinglePart( } -bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, String & out_reason) const +bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const MergeTreeData::DataPartPtr & part, PreformattedMessage & out_reason) const { std::lock_guard lock(queue.state_mutex); for (const auto & entry : queue.queue) @@ -2480,7 +2479,7 @@ bool ReplicatedMergeTreeMergePredicate::partParticipatesInReplaceRange(const Mer if (part->info.isDisjoint(MergeTreePartInfo::fromPartName(part_name, queue.format_version))) continue; - out_reason = fmt::format("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); + out_reason = PreformattedMessage::create("Part {} participates in REPLACE_RANGE {} ({})", part_name, entry->new_part_name, entry->znode_name); return true; } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index b17e78199463..85f3aacc766e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -505,19 +505,19 @@ class BaseMergePredicate bool operator()(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, const MergeTreeTransaction * txn, - String & out_reason) const; + PreformattedMessage & out_reason) const; /// Can we assign a merge with these two parts? /// (assuming that no merge was assigned after the predicate was constructed) /// If we can't and out_reason is not nullptr, set it to the reason why we can't merge. bool canMergeTwoParts(const MergeTreeData::DataPartPtr & left, const MergeTreeData::DataPartPtr & right, - String & out_reason) const; + PreformattedMessage & out_reason) const; /// Can we assign a merge this part and some other part? /// For example a merge of a part and itself is needed for TTL. /// This predicate is checked for the first part of each range. - bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, String & out_reason) const; + bool canMergeSinglePart(const MergeTreeData::DataPartPtr & part, PreformattedMessage & out_reason) const; CommittingBlocks getCommittingBlocks(zkutil::ZooKeeperPtr & zookeeper, const std::string & zookeeper_path, LoggerPtr log_); @@ -561,7 +561,7 @@ class ReplicatedMergeTreeMergePredicate : public BaseMergePredicate & lock, const MergeTreeTransactionPtr & txn, @@ -951,7 +951,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( CurrentlyMergingPartsTaggerPtr merging_tagger; MergeList::EntryPtr merge_entry; - auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, String & disable_reason) -> bool + auto can_merge = [this, &lock](const DataPartPtr & left, const DataPartPtr & right, const MergeTreeTransaction * tx, PreformattedMessage & disable_reason) -> bool { if (tx) { @@ -960,7 +960,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && !left->version.isVisible(tx->getSnapshot(), Tx::EmptyTID)) || (right && !right->version.isVisible(tx->getSnapshot(), Tx::EmptyTID))) { - disable_reason = "Some part is not visible in transaction"; + disable_reason = PreformattedMessage::create("Some part is not visible in transaction"); return false; } @@ -968,7 +968,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if ((left && left->version.isRemovalTIDLocked()) || (right && right->version.isRemovalTIDLocked())) { - disable_reason = "Some part is locked for removal in another cuncurrent transaction"; + disable_reason = PreformattedMessage::create("Some part is locked for removal in another cuncurrent transaction"); return false; } } @@ -979,7 +979,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( { if (currently_merging_mutating_parts.contains(right)) { - disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = PreformattedMessage::create("Some part currently in a merging or mutating process"); return false; } else @@ -988,13 +988,13 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (currently_merging_mutating_parts.contains(left) || currently_merging_mutating_parts.contains(right)) { - disable_reason = "Some part currently in a merging or mutating process"; + disable_reason = PreformattedMessage::create("Some part currently in a merging or mutating process"); return false; } if (getCurrentMutationVersion(left, lock) != getCurrentMutationVersion(right, lock)) { - disable_reason = "Some parts have different mutation version"; + disable_reason = PreformattedMessage::create("Some parts have different mutation version"); return false; } @@ -1004,7 +1004,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( auto max_possible_level = getMaxLevelInBetween(left, right); if (max_possible_level > std::max(left->info.level, right->info.level)) { - disable_reason = fmt::format("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); + disable_reason = PreformattedMessage::create("There is an outdated part in a gap between two active parts ({}, {}) with merge level {} higher than these active parts have", left->name, right->name, max_possible_level); return false; } @@ -1013,11 +1013,11 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; - auto is_background_memory_usage_ok = [](String & disable_reason) -> bool + auto is_background_memory_usage_ok = [](PreformattedMessage & disable_reason) -> bool { if (canEnqueueBackgroundTask()) return true; - disable_reason = fmt::format("Current background tasks memory usage ({}) is more than the limit ({})", + disable_reason = PreformattedMessage::create("Current background tasks memory usage ({}) is more than the limit ({})", formatReadableSizeWithBinarySuffix(background_memory_tracker.get()), formatReadableSizeWithBinarySuffix(background_memory_tracker.getSoftLimit())); return false; @@ -1045,7 +1045,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( out_disable_reason); } else - out_disable_reason = "Current value of max_source_parts_size is zero"; + out_disable_reason = PreformattedMessage::create("Current value of max_source_parts_size is zero"); } } else @@ -1086,7 +1086,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (std::cv_status::timeout == currently_processing_in_background_condition.wait_for(lock, timeout)) { - out_disable_reason = fmt::format("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); + out_disable_reason = PreformattedMessage::create("Timeout ({} ms) while waiting for already running merges before running OPTIMIZE with FINAL", timeout_ms); break; } } @@ -1102,9 +1102,9 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMerge( if (select_decision != SelectPartsDecision::SELECTED) { - if (!out_disable_reason.empty()) - out_disable_reason += ". "; - out_disable_reason += "Cannot select parts for optimization"; + if (!out_disable_reason.text.empty()) + out_disable_reason.text += ". "; + out_disable_reason.text += "Cannot select parts for optimization"; return {}; } @@ -1125,7 +1125,7 @@ bool StorageMergeTree::merge( const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions) { auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); @@ -1180,7 +1180,7 @@ bool StorageMergeTree::partIsAssignedToBackgroundOperation(const DataPartPtr & p } MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String & /* disable_reason */, TableLockHolder & /* table_lock_holder */, + const StorageMetadataPtr & metadata_snapshot, PreformattedMessage & /* disable_reason */, TableLockHolder & /* table_lock_holder */, std::unique_lock & /*currently_processing_in_background_mutex_lock*/) { if (current_mutations_by_version.empty()) @@ -1396,7 +1396,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign if (merger_mutator.merges_blocker.isCancelled()) return false; - String out_reason; + PreformattedMessage out_reason; merge_entry = selectPartsToMerge(metadata_snapshot, false, {}, false, out_reason, shared_lock, lock, txn); if (!merge_entry && !current_mutations_by_version.empty()) @@ -1559,14 +1559,12 @@ bool StorageMergeTree::optimize( auto txn = local_context->getCurrentTransaction(); - String disable_reason; + PreformattedMessage disable_reason; if (!partition && final) { if (cleanup && this->merging_params.mode != MergingParams::Mode::Replacing) { - constexpr const char * message = "Cannot OPTIMIZE with CLEANUP table: {}"; - disable_reason = "only ReplacingMergeTree can be CLEANUP"; - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, "Cannot OPTIMIZE with CLEANUP table: only ReplacingMergeTree can be CLEANUP"); } if (cleanup && !getSettings()->allow_experimental_replacing_merge_with_cleanup) @@ -1592,12 +1590,12 @@ bool StorageMergeTree::optimize( local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; - if (disable_reason.empty()) - disable_reason = "unknown reason"; - LOG_INFO(log, message, disable_reason); + if (disable_reason.text.empty()) + disable_reason = PreformattedMessage::create("unknown reason"); + LOG_INFO(log, message, disable_reason.text); if (local_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason.text); return false; } } @@ -1620,12 +1618,12 @@ bool StorageMergeTree::optimize( local_context->getSettingsRef().optimize_skip_merged_partitions)) { constexpr auto message = "Cannot OPTIMIZE table: {}"; - if (disable_reason.empty()) - disable_reason = "unknown reason"; - LOG_INFO(log, message, disable_reason); + if (disable_reason.text.empty()) + disable_reason = PreformattedMessage::create("unknown reason"); + LOG_INFO(log, message, disable_reason.text); if (local_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason); + throw Exception(ErrorCodes::CANNOT_ASSIGN_OPTIMIZE, message, disable_reason.text); return false; } } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index c384a3912916..02217e6d1380 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -175,7 +175,7 @@ class StorageMergeTree final : public MergeTreeData const Names & deduplicate_by_columns, bool cleanup, const MergeTreeTransactionPtr & txn, - String & out_disable_reason, + PreformattedMessage & out_disable_reason, bool optimize_skip_merged_partitions = false); void renameAndCommitEmptyParts(MutableDataPartsVector & new_parts, Transaction & transaction); @@ -202,7 +202,7 @@ class StorageMergeTree final : public MergeTreeData bool aggressive, const String & partition_id, bool final, - String & disable_reason, + PreformattedMessage & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & lock, const MergeTreeTransactionPtr & txn, @@ -211,7 +211,7 @@ class StorageMergeTree final : public MergeTreeData MergeMutateSelectedEntryPtr selectPartsToMutate( - const StorageMetadataPtr & metadata_snapshot, String & disable_reason, + const StorageMetadataPtr & metadata_snapshot, PreformattedMessage & disable_reason, TableLockHolder & table_lock_holder, std::unique_lock & currently_processing_in_background_mutex_lock); /// For current mutations queue, returns maximum version of mutation for a part, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c41403e312bf..2feaca6ba48a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3791,7 +3791,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() merge_pred.emplace(queue.getMergePredicate(zookeeper, partitions_to_merge_in)); } - String out_reason; + PreformattedMessage out_reason; if (can_assign_merge && merger_mutator.selectPartsToMerge(future_merged_part, false, max_source_parts_size_for_merge, *merge_pred, merge_with_ttl_allowed, NO_TRANSACTION_PTR, out_reason, &partitions_to_merge_in) == SelectPartsDecision::SELECTED) @@ -5773,7 +5773,7 @@ bool StorageReplicatedMergeTree::optimize( future_merged_part->uuid = UUIDHelpers::generateV4(); constexpr const char * unknown_disable_reason = "unknown reason"; - String disable_reason = unknown_disable_reason; + PreformattedMessage disable_reason = PreformattedMessage::create(unknown_disable_reason); SelectPartsDecision select_decision = SelectPartsDecision::CANNOT_SELECT; if (partition_id.empty()) @@ -5796,10 +5796,10 @@ bool StorageReplicatedMergeTree::optimize( if (select_decision != SelectPartsDecision::SELECTED) { constexpr const char * message_fmt = "Cannot select parts for optimization: {}"; - assert(disable_reason != unknown_disable_reason); + assert(disable_reason.text != unknown_disable_reason); if (!partition_id.empty()) - disable_reason += fmt::format(" (in partition {})", partition_id); - return handle_noop(message_fmt, disable_reason); + disable_reason.text += fmt::format(" (in partition {})", partition_id); + return handle_noop(message_fmt, disable_reason.text); } ReplicatedMergeTreeLogEntryData merge_entry; @@ -8465,9 +8465,9 @@ void StorageReplicatedMergeTree::movePartitionToShard( } /// canMergeSinglePart is overlapping with dropPart, let's try to use the same code. - String out_reason; + PreformattedMessage out_reason; if (!merge_pred.canMergeSinglePart(part, out_reason)) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason.text); } { @@ -8725,18 +8725,18 @@ bool StorageReplicatedMergeTree::dropPartImpl( /// There isn't a lot we can do otherwise. Can't cancel merges because it is possible that a replica already /// finished the merge. - String out_reason; + PreformattedMessage out_reason; if (!merge_pred.canMergeSinglePart(part, out_reason)) { if (throw_if_noop) - throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } if (merge_pred.partParticipatesInReplaceRange(part, out_reason)) { if (throw_if_noop) - throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } From 2e066966b1349cee1db04461ab5f8214b1c404cf Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 25 Mar 2024 22:17:43 +0000 Subject: [PATCH 032/470] Add log message, use storesDataOnDisk method --- src/Interpreters/InterpreterDropQuery.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index d067f5de1701..085210aae8c4 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -155,9 +155,13 @@ BlockIO InterpreterDropQuery::executeToTableImpl(const ContextPtr & context_, AS if (settings.ignore_drop_queries_probability != 0 && ast_drop_query.kind == ASTDropQuery::Kind::Drop && std::uniform_real_distribution<>(0.0, 1.0)(thread_local_rng) <= settings.ignore_drop_queries_probability) { ast_drop_query.sync = false; - if (table->getName() != "Memory" && table->getName() != "Join") + if (table->storesDataOnDisk()) + { + LOG_TEST(getLogger("InterpreterDropQuery"), "Ignore DROP TABLE query for table {}.{}", table_id.database_name, table_id.table_name); return {}; + } + LOG_TEST(getLogger("InterpreterDropQuery"), "Replace DROP TABLE query to TRUNCATE TABLE for table {}.{}", table_id.database_name, table_id.table_name); ast_drop_query.kind = ASTDropQuery::Truncate; } From be2767b55c90c4eb89b0f5c15cd5961d9594b0a2 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 26 Mar 2024 10:53:08 +0100 Subject: [PATCH 033/470] Make ClientBase::parseQuery() static Signed-off-by: Azat Khuzhin --- programs/client/Client.cpp | 6 +++++- src/Client/ClientBase.cpp | 19 +++++++++++++------ src/Client/ClientBase.h | 2 +- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d4bf2f686c88..abc0a4bdd348 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -686,7 +686,11 @@ bool Client::processWithFuzzing(const String & full_query) try { const char * begin = full_query.data(); - orig_ast = parseQuery(begin, begin + full_query.size(), true); + orig_ast = parseQuery(begin, begin + full_query.size(), + global_context->getSettingsRef(), + /*allow_multi_statements=*/ true, + /*is_interactive=*/ is_interactive, + /*ignore_error=*/ ignore_error); } catch (const Exception & e) { diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 767a9b2b9f9e..01bf4c820c8d 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -329,12 +329,11 @@ void ClientBase::setupSignalHandler() } -ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const +ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error) { std::unique_ptr parser; ASTPtr res; - const auto & settings = global_context->getSettingsRef(); size_t max_length = 0; if (!allow_multi_statements) @@ -343,11 +342,11 @@ ASTPtr ClientBase::parseQuery(const char *& pos, const char * end, bool allow_mu const Dialect & dialect = settings.dialect; if (dialect == Dialect::kusto) - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); else if (dialect == Dialect::prql) parser = std::make_unique(max_length, settings.max_parser_depth, settings.max_parser_backtracks); else - parser = std::make_unique(end, global_context->getSettings().allow_settings_after_format_in_insert); + parser = std::make_unique(end, settings.allow_settings_after_format_in_insert); if (is_interactive || ignore_error) { @@ -907,7 +906,11 @@ void ClientBase::processTextAsSingleQuery(const String & full_query) /// Some parts of a query (result output and formatting) are executed /// client-side. Thus we need to parse the query. const char * begin = full_query.data(); - auto parsed_query = parseQuery(begin, begin + full_query.size(), false); + auto parsed_query = parseQuery(begin, begin + full_query.size(), + global_context->getSettingsRef(), + /*allow_multi_statements=*/ false, + is_interactive, + ignore_error); if (!parsed_query) return; @@ -2084,7 +2087,11 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( this_query_end = this_query_begin; try { - parsed_query = parseQuery(this_query_end, all_queries_end, true); + parsed_query = parseQuery(this_query_end, all_queries_end, + global_context->getSettingsRef(), + /*allow_multi_statements=*/ true, + is_interactive, + ignore_error); } catch (Exception & e) { diff --git a/src/Client/ClientBase.h b/src/Client/ClientBase.h index 9ec87ababfc9..3657c5c35464 100644 --- a/src/Client/ClientBase.h +++ b/src/Client/ClientBase.h @@ -73,6 +73,7 @@ class ClientBase : public Poco::Util::Application, public IHints<2> void init(int argc, char ** argv); std::vector getAllRegisteredNames() const override { return cmd_options; } + static ASTPtr parseQuery(const char *& pos, const char * end, const Settings & settings, bool allow_multi_statements, bool is_interactive, bool ignore_error); protected: void runInteractive(); @@ -98,7 +99,6 @@ class ClientBase : public Poco::Util::Application, public IHints<2> ASTPtr parsed_query, std::optional echo_query_ = {}, bool report_error = false); static void adjustQueryEnd(const char *& this_query_end, const char * all_queries_end, uint32_t max_parser_depth, uint32_t max_parser_backtracks); - ASTPtr parseQuery(const char *& pos, const char * end, bool allow_multi_statements) const; static void setupSignalHandler(); bool executeMultiQuery(const String & all_queries_text); From 469be8ee8a904bead3ee33db38c66ee1e5b431cc Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 25 Mar 2024 16:39:33 +0100 Subject: [PATCH 034/470] Implement input() for clickhouse-local Signed-off-by: Azat Khuzhin --- programs/local/LocalServer.cpp | 14 +++- programs/local/LocalServer.h | 2 + src/Client/LocalConnection.cpp | 64 ++++++++++++++++++- src/Client/LocalConnection.h | 15 ++++- .../03031_clickhouse_local_input.reference | 7 ++ .../03031_clickhouse_local_input.sh | 20 ++++++ 6 files changed, 118 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03031_clickhouse_local_input.reference create mode 100755 tests/queries/0_stateless/03031_clickhouse_local_input.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 05c9830ee2c2..1c2ccc5383ff 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -413,8 +413,20 @@ void LocalServer::setupUsers() void LocalServer::connect() { connection_parameters = ConnectionParameters(config(), "localhost"); + + ReadBuffer * in; + auto table_file = config().getString("table-file", "-"); + if (table_file == "-" || table_file == "stdin") + { + in = &std_in; + } + else + { + input = std::make_unique(table_file); + in = input.get(); + } connection = LocalConnection::createConnection( - connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name); + connection_parameters, global_context, in, need_render_progress, need_render_profile_events, server_display_name); } diff --git a/programs/local/LocalServer.h b/programs/local/LocalServer.h index ca0ce513b093..9b67aab02d4a 100644 --- a/programs/local/LocalServer.h +++ b/programs/local/LocalServer.h @@ -65,6 +65,8 @@ class LocalServer : public ClientBase, public Loggers std::optional status; std::optional temporary_directory_to_delete; + + std::unique_ptr input; }; } diff --git a/src/Client/LocalConnection.cpp b/src/Client/LocalConnection.cpp index afcaa4d60985..c7494e316057 100644 --- a/src/Client/LocalConnection.cpp +++ b/src/Client/LocalConnection.cpp @@ -1,11 +1,18 @@ #include "LocalConnection.h" +#include +#include #include #include #include +#include #include #include #include #include +#include +#include +#include +#include #include #include #include @@ -22,12 +29,13 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -LocalConnection::LocalConnection(ContextPtr context_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) +LocalConnection::LocalConnection(ContextPtr context_, ReadBuffer * in_, bool send_progress_, bool send_profile_events_, const String & server_display_name_) : WithContext(context_) , session(getContext(), ClientInfo::Interface::LOCAL) , send_progress(send_progress_) , send_profile_events(send_profile_events_) , server_display_name(server_display_name_) + , in(in_) { /// Authenticate and create a context to execute queries. session.authenticate("default", "", Poco::Net::SocketAddress{}); @@ -130,6 +138,57 @@ void LocalConnection::sendQuery( next_packet_type.reset(); + /// Prepare input() function + query_context->setInputInitializer([this] (ContextPtr context, const StoragePtr & input_storage) + { + if (context != query_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in Input initializer"); + + auto metadata_snapshot = input_storage->getInMemoryMetadataPtr(); + Block sample = metadata_snapshot->getSampleBlock(); + + next_packet_type = Protocol::Server::Data; + state->block = sample; + + String current_format = "Values"; + const char * begin = state->query.data(); + auto parsed_query = ClientBase::parseQuery(begin, begin + state->query.size(), + context->getSettingsRef(), + /*allow_multi_statements=*/ false, + /*is_interactive=*/ false, + /*ignore_error=*/ false); + if (const auto * insert = parsed_query->as()) + { + if (!insert->format.empty()) + current_format = insert->format; + } + + auto source = context->getInputFormat(current_format, *in, sample, context->getSettingsRef().max_insert_block_size); + Pipe pipe(source); + + auto columns_description = metadata_snapshot->getColumns(); + if (columns_description.hasDefaults()) + { + pipe.addSimpleTransform([&](const Block & header) + { + return std::make_shared(header, columns_description, *source, context); + }); + } + + state->input_pipeline = std::make_unique(std::move(pipe)); + state->input_pipeline_executor = std::make_unique(*state->input_pipeline); + + }); + query_context->setInputBlocksReaderCallback([this] (ContextPtr context) -> Block + { + if (context != query_context) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in InputBlocksReader"); + + Block block; + state->input_pipeline_executor->pull(block); + return block; + }); + try { state->io = executeQuery(state->query, query_context, QueryFlags{}, state->stage).second; @@ -537,11 +596,12 @@ void LocalConnection::sendMergeTreeReadTaskResponse(const ParallelReadResponse & ServerConnectionPtr LocalConnection::createConnection( const ConnectionParameters &, ContextPtr current_context, + ReadBuffer * in, bool send_progress, bool send_profile_events, const String & server_display_name) { - return std::make_unique(current_context, send_progress, send_profile_events, server_display_name); + return std::make_unique(current_context, in, send_progress, send_profile_events, server_display_name); } diff --git a/src/Client/LocalConnection.h b/src/Client/LocalConnection.h index 9c2d0a81d8d5..a23450709adf 100644 --- a/src/Client/LocalConnection.h +++ b/src/Client/LocalConnection.h @@ -15,6 +15,8 @@ namespace DB class PullingAsyncPipelineExecutor; class PushingAsyncPipelineExecutor; class PushingPipelineExecutor; +class QueryPipeline; +class ReadBuffer; /// State of query processing. struct LocalQueryState @@ -31,6 +33,10 @@ struct LocalQueryState std::unique_ptr executor; std::unique_ptr pushing_executor; std::unique_ptr pushing_async_executor; + /// For sending data for input() function. + std::unique_ptr input_pipeline; + std::unique_ptr input_pipeline_executor; + InternalProfileEventsQueuePtr profile_queue; std::unique_ptr exception; @@ -64,7 +70,11 @@ class LocalConnection : public IServerConnection, WithContext { public: explicit LocalConnection( - ContextPtr context_, bool send_progress_ = false, bool send_profile_events_ = false, const String & server_display_name_ = ""); + ContextPtr context_, + ReadBuffer * in_, + bool send_progress_, + bool send_profile_events_, + const String & server_display_name_); ~LocalConnection() override; @@ -73,6 +83,7 @@ class LocalConnection : public IServerConnection, WithContext static ServerConnectionPtr createConnection( const ConnectionParameters & connection_parameters, ContextPtr current_context, + ReadBuffer * in = nullptr, bool send_progress = false, bool send_profile_events = false, const String & server_display_name = ""); @@ -158,5 +169,7 @@ class LocalConnection : public IServerConnection, WithContext String current_database; ProfileEvents::ThreadIdToCountersSnapshot last_sent_snapshots; + + ReadBuffer * in; }; } diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.reference b/tests/queries/0_stateless/03031_clickhouse_local_input.reference new file mode 100644 index 000000000000..a6feeef100d9 --- /dev/null +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.reference @@ -0,0 +1,7 @@ +# foo +foo +# !foo +# bar +bar +# defaults +bam diff --git a/tests/queries/0_stateless/03031_clickhouse_local_input.sh b/tests/queries/0_stateless/03031_clickhouse_local_input.sh new file mode 100755 index 000000000000..6f59e9b97031 --- /dev/null +++ b/tests/queries/0_stateless/03031_clickhouse_local_input.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +tmp_file="$CUR_DIR/$CLICKHOUSE_DATABASE.txt" +echo '# foo' +$CLICKHOUSE_LOCAL --engine_file_truncate_on_insert=1 -n -q "insert into function file('$tmp_file', 'LineAsString', 'x String') select * from input('x String') format LineAsString" << Date: Tue, 26 Mar 2024 15:03:00 +0000 Subject: [PATCH 035/470] Fix unrelated changes in contrib --- contrib/arrow | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/arrow b/contrib/arrow index 12232bbbe39b..46e7ed11c2e0 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 12232bbbe39b4ffbd921a0caff6d046ae009a753 +Subproject commit 46e7ed11c2e0ef62ccbbe23e6a35a4988884e450 From c5f6296b434bc3a79a402b8b13f5b71afcde1fc3 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:07:15 +0100 Subject: [PATCH 036/470] Try to fix links in ru docs --- docs/ru/sql-reference/data-types/datetime.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 25e877941475..9f2d21eb29fa 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -27,9 +27,9 @@ DateTime([timezone]) Консольный клиент ClickHouse по умолчанию использует часовой пояс сервера, если для значения `DateTime` часовой пояс не был задан в явном виде при инициализации типа данных. Чтобы использовать часовой пояс клиента, запустите [clickhouse-client](../../interfaces/cli.md) с параметром `--use_client_time_zone`. -ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/settings-formats.md#date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). +ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/settings-formats.md#date_time_input_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_output_format). ## Примеры {#primery} @@ -119,8 +119,8 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/settings-formats.md#date_time_input_format) -- [Настройка `date_time_output_format`](../../operations/settings/settings-formats.md#date_time_output_format) +- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_output_format) +- [Настройка `date_time_output_format`](../../operations/settings/index.md#settings-date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) From 837b89b8fe0cd740d724fea664b278fe8fb1d72f Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 26 Mar 2024 16:08:19 +0100 Subject: [PATCH 037/470] Fix settings names --- docs/ru/sql-reference/data-types/datetime.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index 9f2d21eb29fa..34cd44d47095 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -29,7 +29,7 @@ DateTime([timezone]) ClickHouse отображает значения в зависимости от значения параметра [date\_time\_output\_format](../../operations/settings/index.md#settings-date_time_output_format). Текстовый формат по умолчанию `YYYY-MM-DD hh:mm:ss`. Кроме того, вы можете поменять отображение с помощью функции [formatDateTime](../../sql-reference/functions/date-time-functions.md#formatdatetime). -При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_output_format). +При вставке данных в ClickHouse, можно использовать различные форматы даты и времени в зависимости от значения настройки [date_time_input_format](../../operations/settings/index.md#settings-date_time_input_format). ## Примеры {#primery} @@ -119,7 +119,7 @@ FROM dt - [Функции преобразования типов](../../sql-reference/functions/type-conversion-functions.md) - [Функции для работы с датой и временем](../../sql-reference/functions/date-time-functions.md) - [Функции для работы с массивами](../../sql-reference/functions/array-functions.md) -- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_output_format) +- [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) - [Настройка `date_time_output_format`](../../operations/settings/index.md#settings-date_time_output_format) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) - [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) From 34c8ec67e88351541e627fb7faf13d66c179eb2c Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 26 Mar 2024 15:09:16 +0000 Subject: [PATCH 038/470] fix --- tests/ci/ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index b913550e3c90..c9b93533b665 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -1634,7 +1634,7 @@ def _add_build_to_version_history( "git_ref": pr_info.head_ref, } - print(f"::notice ::Log Adding record to versions history: {json_str}") + print(f"::notice ::Log Adding record to versions history: {data}") ch_helper.insert_event_into(db="default", table="version_history", event=data) From cf5517a0994182c7893513665e30322e2f7cc68d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 27 Mar 2024 11:31:41 +0000 Subject: [PATCH 039/470] Cancel merges during move/replace partition --- base/base/scope_guard.h | 8 ++-- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 42 ++++++++++++++++++- .../MergeTree/ReplicatedMergeTreeQueue.h | 12 ++++++ src/Storages/StorageReplicatedMergeTree.cpp | 29 ++++++++++++- 4 files changed, 84 insertions(+), 7 deletions(-) diff --git a/base/base/scope_guard.h b/base/base/scope_guard.h index 03670792d596..e6789c5cb1bb 100644 --- a/base/base/scope_guard.h +++ b/base/base/scope_guard.h @@ -29,11 +29,13 @@ class [[nodiscard]] BasicScopeGuard requires std::is_convertible_v constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations) { - if (this != &src) + if constexpr (std::is_same_v) { - invoke(); - function = src.release(); + if (this == &src) + return *this; } + invoke(); + function = src.release(); return *this; } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index ee4ed87d456a..6dadada2e7fd 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -8,10 +8,8 @@ #include #include #include -#include "Storages/MutationCommands.h" #include #include - #include #include @@ -221,6 +219,43 @@ void ReplicatedMergeTreeQueue::createLogEntriesToFetchBrokenParts() broken_parts_to_enqueue_fetches_on_loading.clear(); } +void ReplicatedMergeTreeQueue::addDropReplaceIntent(const MergeTreePartInfo & intent) +{ + std::lock_guard lock{state_mutex}; + drop_replace_range_intents.push_back(intent); +} + +void ReplicatedMergeTreeQueue::removeDropReplaceIntent(const MergeTreePartInfo & intent) +{ + std::lock_guard lock{state_mutex}; + auto it = std::find(drop_replace_range_intents.begin(), drop_replace_range_intents.end(), intent); + chassert(it != drop_replace_range_intents.end()); + drop_replace_range_intents.erase(it); +} + +bool ReplicatedMergeTreeQueue::isIntersectingWithDropReplaceIntent( + const LogEntry & entry, const String & part_name, String & out_reason, std::unique_lock & /*state_mutex lock*/) const +{ + // TODO(antaljanosbenjamin): fill out out_reason + const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); + for (const auto & intent : drop_replace_range_intents) + { + if (!intent.isDisjoint(part_info)) + { + constexpr auto fmt_string = "Not executing {} of type {} for part {} (actual part {})" + "because there is a drop or replace intent with part name {}."; + LOG_INFO( + LogToStr(out_reason, log), + fmt_string, + entry.znode_name, + entry.type, + entry.new_part_name, + part_name, + intent.getPartNameForLogs()); + } + } + return false; +} void ReplicatedMergeTreeQueue::insertUnlocked( const LogEntryPtr & entry, std::optional & min_unprocessed_insert_time_changed, @@ -1303,6 +1338,9 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( /// We can wait in worker threads, but not in scheduler. if (isCoveredByFuturePartsImpl(entry, new_part_name, out_postpone_reason, state_lock, /* covered_entries_to_wait */ nullptr)) return false; + + if (isIntersectingWithDropReplaceIntent(entry, new_part_name, out_postpone_reason, state_lock)) + return false; } if (entry.type != LogEntry::DROP_RANGE && entry.type != LogEntry::DROP_PART) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index b17e78199463..95016d60ef10 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -107,6 +107,8 @@ class ReplicatedMergeTreeQueue */ ActiveDataPartSet virtual_parts; + /// Used to prevent operations to start in ranges which will be affected by DROP_RANGE/REPLACE_RANGE + std::vector drop_replace_range_intents; /// We do not add DROP_PARTs to virtual_parts because they can intersect, /// so we store them separately in this structure. @@ -251,6 +253,10 @@ class ReplicatedMergeTreeQueue std::optional min_unprocessed_insert_time_changed, std::optional max_processed_insert_time_changed) const; + bool isIntersectingWithDropReplaceIntent( + const LogEntry & entry, + const String & part_name, String & out_reason, std::unique_lock & /*state_mutex lock*/) const; + /// Marks the element of the queue as running. class CurrentlyExecuting { @@ -490,6 +496,12 @@ class ReplicatedMergeTreeQueue void setBrokenPartsToEnqueueFetchesOnLoading(Strings && parts_to_fetch); /// Must be called right after queue loading. void createLogEntriesToFetchBrokenParts(); + + /// Add an intent to block operations to start in the range. All intents must be removed by calling + /// removeDropReplaceIntent(). The same intent can be added multiple times, but it has to be removed exactly + /// the same amount of times. + void addDropReplaceIntent(const MergeTreePartInfo& intent); + void removeDropReplaceIntent(const MergeTreePartInfo& intent); }; using CommittingBlocks = std::unordered_map>; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 905473302ba5..52847935a72d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7998,10 +7998,19 @@ void StorageReplicatedMergeTree::replacePartitionFrom( replace = false; } + scope_guard intent_guard; if (!replace) { /// It's ATTACH PARTITION FROM, not REPLACE PARTITION. We have to reset drop range drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(partition_id); + queue.addDropReplaceIntent(drop_range); + intent_guard = scope_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; + + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + { + auto pause_checking_parts = part_check_thread.pausePartsCheck(); + part_check_thread.cancelRemovedPartsCheck(drop_range); + } } assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); @@ -8174,8 +8183,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom( lock2.reset(); lock1.reset(); - /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) + /// We need to pull the REPLACE_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); + // No need to block operations further, especially that in case we have to wait for mutation to finish, the intent would block + // the execution of REPLACE_RANGE + intent_guard.reset(); parts_holder.clear(); cleanup_thread.wakeup(); @@ -8227,11 +8239,21 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta Coordination::Stat alter_partition_version_stat; zookeeper->get(alter_partition_version_path, &alter_partition_version_stat); - MergeTreePartInfo drop_range; std::optional delimiting_block_lock; + MergeTreePartInfo drop_range; getFakePartCoveringAllPartsInPartition(partition_id, drop_range, delimiting_block_lock, true); String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); + queue.addDropReplaceIntent(drop_range); + // Let's copy drop_range to make sure it doesn't get modified, otherwise we might run into issue on removal + scope_guard intent_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; + + getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + { + auto pause_checking_parts = part_check_thread.pausePartsCheck(); + part_check_thread.cancelRemovedPartsCheck(drop_range); + } + DataPartPtr covering_part; DataPartsVector src_all_parts; { @@ -8436,6 +8458,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta /// We need to pull the DROP_RANGE before cleaning the replaced parts (otherwise CHeckThread may decide that parts are lost) queue.pullLogsToQueue(getZooKeeperAndAssertNotReadonly(), {}, ReplicatedMergeTreeQueue::SYNC); + // No need to block operations further, especially that in case we have to wait for mutation to finish, the intent would block + // the execution of DROP_RANGE + intent_guard.reset(); parts_holder.clear(); cleanup_thread.wakeup(); From 42a27c5c875af506bddc8c4e932acf91ec0410a9 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 27 Mar 2024 16:27:47 +0100 Subject: [PATCH 040/470] Fix WriteBufferAzureBlobStorage destructor uncaught exception --- .../IO/WriteBufferFromAzureBlobStorage.cpp | 42 +++++++++++++++---- .../IO/WriteBufferFromAzureBlobStorage.h | 5 ++- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 05b93dd1fa34..9432cdf9fef4 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -65,7 +65,20 @@ WriteBufferFromAzureBlobStorage::WriteBufferFromAzureBlobStorage( WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() { - finalize(); + LOG_TRACE(limitedLog, "Close WriteBufferFromAzureBlobStorage. {}.", blob_path); + + /// That destructor could be call with finalized=false in case of exceptions + if (!finalized) + { + LOG_INFO( + log, + "WriteBufferFromAzureBlobStorage is not finalized in destructor. " + "The file might not be written to AzureBlobStorage. " + "{}.", + blob_path); + } + + task_tracker->safeWaitAll(); } void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries, size_t cost) @@ -102,9 +115,13 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, } } -void WriteBufferFromAzureBlobStorage::finalizeImpl() +void WriteBufferFromAzureBlobStorage::preFinalize() { - auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + if (is_prefinalized) + return; + + // This function should not be run again + is_prefinalized = true; /// If there is only one block and size is less than or equal to max_single_part_upload_size /// then we use single part upload instead of multi part upload @@ -113,6 +130,7 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() size_t data_size = size_t(position() - memory.data()); if (data_size <= max_single_part_upload_size) { + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); Azure::Core::IO::MemoryBodyStream memory_stream(reinterpret_cast(memory.data()), data_size); execWithRetry([&](){ block_blob_client.Upload(memory_stream); }, max_unexpected_write_error_retries, data_size); LOG_TRACE(log, "Committed single block for blob `{}`", blob_path); @@ -120,14 +138,22 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() } } + writePart(); +} - execWithRetry([this](){ next(); }, max_unexpected_write_error_retries); - - task_tracker->waitAll(); +void WriteBufferFromAzureBlobStorage::finalizeImpl() +{ + LOG_TRACE(log, "finalizeImpl WriteBufferFromAzureBlobStorage {}", blob_path); - execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); + if (!is_prefinalized) + preFinalize(); - LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); + if (!block_ids.empty()) + { + auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); + execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); + LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); + } } void WriteBufferFromAzureBlobStorage::nextImpl() diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h index 6e10c07b255b..7d4081ad792b 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.h +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.h @@ -41,7 +41,7 @@ class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase ~WriteBufferFromAzureBlobStorage() override; void nextImpl() override; - + void preFinalize() override; std::string getFileName() const override { return blob_path; } void sync() override { next(); } @@ -65,6 +65,9 @@ class WriteBufferFromAzureBlobStorage : public WriteBufferFromFileBase const std::string blob_path; const WriteSettings write_settings; + /// Track that prefinalize() is called only once + bool is_prefinalized = false; + AzureClientPtr blob_container_client; std::vector block_ids; From df066193990042d35fcfabb58cd33412693fe8ca Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 20:40:26 +0000 Subject: [PATCH 041/470] Better conversion from String to Variant --- .../Serializations/SerializationBool.cpp | 2 +- src/DataTypes/getLeastSupertype.cpp | 3 + src/Functions/CastOverloadResolver.cpp | 6 +- src/Functions/FunctionsConversion.cpp | 85 +++++++++++++------ .../0_stateless/02941_variant_type_1.sh | 6 +- .../0_stateless/02941_variant_type_2.sh | 2 +- .../0_stateless/02941_variant_type_3.sh | 2 +- .../0_stateless/02941_variant_type_4.sh | 2 +- .../0_stateless/02942_variant_cast.reference | 2 +- .../03032_string_to_variant_cast.reference | 13 +++ .../03032_string_to_variant_cast.sql | 17 ++++ 11 files changed, 107 insertions(+), 33 deletions(-) create mode 100644 tests/queries/0_stateless/03032_string_to_variant_cast.reference create mode 100644 tests/queries/0_stateless/03032_string_to_variant_cast.sql diff --git a/src/DataTypes/Serializations/SerializationBool.cpp b/src/DataTypes/Serializations/SerializationBool.cpp index f745fac4d307..d6a74e5cb8f3 100644 --- a/src/DataTypes/Serializations/SerializationBool.cpp +++ b/src/DataTypes/Serializations/SerializationBool.cpp @@ -194,12 +194,12 @@ ReturnType deserializeImpl( buf.dropCheckpoint(); if (buf.hasUnreadData()) { + restore_column_if_needed(); if constexpr (throw_exception) throw Exception( ErrorCodes::CANNOT_PARSE_BOOL, "Cannot continue parsing after parsed bool value because it will result in the loss of some data. It may happen if " "bool_true_representation or bool_false_representation contains some delimiters of input format"); - restore_column_if_needed(); return ReturnType(false); } return ReturnType(true); diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index dec77119eed6..e69b0411aacb 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -463,6 +463,9 @@ DataTypePtr getLeastSupertype(const DataTypes & types) /// nested_type will be nullptr, we should return nullptr in this case. if (!nested_type) return nullptr; + /// Common type for Nullable(Nothing) and Variant(...) is Variant(...) + if (isVariant(nested_type)) + return nested_type; return std::make_shared(nested_type); } } diff --git a/src/Functions/CastOverloadResolver.cpp b/src/Functions/CastOverloadResolver.cpp index 5ca4b0bc5798..0f54ff52ba2c 100644 --- a/src/Functions/CastOverloadResolver.cpp +++ b/src/Functions/CastOverloadResolver.cpp @@ -100,7 +100,11 @@ class CastOverloadResolverImpl : public IFunctionOverloadResolver validateDataType(type, data_type_validation_settings); if (cast_type == CastType::accurateOrNull) - return makeNullable(type); + { + /// Variant handles NULLs by itself during conversions. + if (!isVariant(type)) + return makeNullable(type); + } if (internal) return type; diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 7049ca441108..6a1a2d5e888b 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1815,6 +1815,7 @@ struct ConvertImpl /// Generic conversion of any type from String. Used for complex types: Array and Tuple or types with custom serialization. +template struct ConvertImplGenericFromString { static ColumnPtr execute(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, size_t input_rows_count) @@ -1854,29 +1855,34 @@ struct ConvertImplGenericFromString { serialization_from.deserializeWholeText(column_to, read_buffer, format_settings); } - catch (const Exception & e) + catch (const Exception &) { - auto * nullable_column = typeid_cast(&column_to); - if (e.code() == ErrorCodes::CANNOT_PARSE_BOOL && nullable_column) - { - auto & col_nullmap = nullable_column->getNullMapData(); - if (col_nullmap.size() != nullable_column->size()) - col_nullmap.resize_fill(nullable_column->size()); - if (nullable_column->size() == (i + 1)) - nullable_column->popBack(1); - nullable_column->insertDefault(); - continue; - } - throw; + if constexpr (throw_on_error) + throw; + /// Check if exception happened after we inserted the value + /// (deserializeWholeText should not do it, but let's check anyway). + if (column_to.size() > i) + column_to.popBack(column_to.size() - i); + column_to.insertDefault(); } + /// Usually deserializeWholeText checks for eof after parsing, but let's check one more time just in case. if (!read_buffer.eof()) { - if (result_type) - throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + if constexpr (throw_on_error) + { + if (result_type) + throwExceptionForIncompletelyParsedValue(read_buffer, *result_type); + else + throw Exception( + ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse string to column {}. Expected eof", column_to.getName()); + } else - throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, - "Cannot parse string to column {}. Expected eof", column_to.getName()); + { + if (column_to.size() > i) + column_to.popBack(column_to.size() - i); + column_to.insertDefault(); + } } } } @@ -3279,7 +3285,9 @@ class FunctionCast final : public IFunctionBase { if (checkAndGetDataType(from_type.get())) { - return &ConvertImplGenericFromString::execute; + if (cast_type == CastType::accurateOrNull) + return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } return createWrapper(from_type, to_type, requested_result_is_nullable); @@ -3442,7 +3450,7 @@ class FunctionCast final : public IFunctionBase /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } else if (const auto * agg_type = checkAndGetDataType(from_type_untyped.get())) { @@ -3485,7 +3493,7 @@ class FunctionCast final : public IFunctionBase /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } DataTypePtr from_type_holder; @@ -3576,7 +3584,7 @@ class FunctionCast final : public IFunctionBase /// Conversion from String through parsing. if (checkAndGetDataType(from_type_untyped.get())) { - return &ConvertImplGenericFromString::execute; + return &ConvertImplGenericFromString::execute; } const auto * from_type = checkAndGetDataType(from_type_untyped.get()); @@ -3921,7 +3929,7 @@ class FunctionCast final : public IFunctionBase { return [] (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * nullable_source, size_t input_rows_count) { - auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); + auto res = ConvertImplGenericFromString::execute(arguments, result_type, nullable_source, input_rows_count)->assumeMutable(); res->finalize(); return res; }; @@ -4076,6 +4084,29 @@ class FunctionCast final : public IFunctionBase return ColumnVariant::create(discriminators, variants); } + WrapperType createStringToVariantWrapper() const + { + return [&](ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr + { + auto column = arguments[0].column->convertToFullColumnIfLowCardinality(); + auto args = arguments; + args[0].column = column; + + const ColumnNullable * column_nullable = nullptr; + if (isColumnNullable(*args[0].column)) + { + column_nullable = assert_cast(args[0].column.get()); + args[0].column = column_nullable->getNestedColumnPtr(); + } + + args[0].type = removeNullable(removeLowCardinality(args[0].type)); + + if (cast_type == CastType::accurateOrNull) + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute(args, result_type, column_nullable, input_rows_count); + }; + } + WrapperType createColumnToVariantWrapper(const DataTypePtr & from_type, const DataTypeVariant & to_variant) const { /// We allow converting NULL to Variant(...) as Variant can store NULLs. @@ -4089,6 +4120,9 @@ class FunctionCast final : public IFunctionBase }; } + if (isStringOrFixedString(removeNullable(removeLowCardinality(from_type)))) + return createStringToVariantWrapper(); + auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); if (!variant_discr_opt) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); @@ -4691,7 +4725,7 @@ class FunctionCast final : public IFunctionBase if (to_type->getCustomSerialization() && to_type->getCustomName()) { - ret = [requested_result_is_nullable]( + ret = [this, requested_result_is_nullable]( ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable * column_nullable, @@ -4700,7 +4734,10 @@ class FunctionCast final : public IFunctionBase auto wrapped_result_type = result_type; if (requested_result_is_nullable) wrapped_result_type = makeNullable(result_type); - return ConvertImplGenericFromString::execute( + if (this->cast_type == CastType::accurateOrNull) + return ConvertImplGenericFromString::execute( + arguments, wrapped_result_type, column_nullable, input_rows_count); + return ConvertImplGenericFromString::execute( arguments, wrapped_result_type, column_nullable, input_rows_count); }; return true; diff --git a/tests/queries/0_stateless/02941_variant_type_1.sh b/tests/queries/0_stateless/02941_variant_type_1.sh index 773a8c4a5e43..4fb76532a058 100755 --- a/tests/queries/0_stateless/02941_variant_type_1.sh +++ b/tests/queries/0_stateless/02941_variant_type_1.sh @@ -14,7 +14,7 @@ function test1_insert() echo "test1 insert" $CH_CLIENT -nmq "insert into test select number, NULL from numbers(3); insert into test select number + 3, number from numbers(3); -insert into test select number + 6, 'str_' || toString(number) from numbers(3); +insert into test select number + 6, ('str_' || toString(number))::Variant(String) from numbers(3); insert into test select number + 9, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(3); insert into test select number + 12, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(3); insert into test select number + 15, range(number + 1)::Array(UInt64) from numbers(3);" @@ -40,7 +40,7 @@ function test2_insert() echo "test2 insert" $CH_CLIENT -nmq "insert into test select number, NULL from numbers(3); insert into test select number + 3, number % 2 ? NULL : number from numbers(3); -insert into test select number + 6, number % 2 ? NULL : 'str_' || toString(number) from numbers(3); +insert into test select number + 6, number % 2 ? NULL : ('str_' || toString(number))::Variant(String) from numbers(3); insert into test select number + 9, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3); insert into test select number + 12, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3); insert into test select number + 15, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(3);" @@ -64,7 +64,7 @@ select v.\`Array(UInt64)\`.size0 from test order by id;" function test3_insert() { echo "test3 insert" - $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST(('str_' || toString(number))::Variant(String), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number + 1)::Array(UInt64), type)) as res from numbers(18);" } function test3_select() diff --git a/tests/queries/0_stateless/02941_variant_type_2.sh b/tests/queries/0_stateless/02941_variant_type_2.sh index d1fa0a777c95..995b622b6bfb 100755 --- a/tests/queries/0_stateless/02941_variant_type_2.sh +++ b/tests/queries/0_stateless/02941_variant_type_2.sh @@ -14,7 +14,7 @@ function test4_insert() echo "test4 insert" $CH_CLIENT -nmq "insert into test select number, NULL from numbers(100000); insert into test select number + 100000, number from numbers(100000); -insert into test select number + 200000, 'str_' || toString(number) from numbers(100000); +insert into test select number + 200000, ('str_' || toString(number))::Variant(String) from numbers(100000); insert into test select number + 300000, ('lc_str_' || toString(number))::LowCardinality(String) from numbers(100000); insert into test select number + 400000, tuple(number, number + 1)::Tuple(a UInt32, b UInt32) from numbers(100000); insert into test select number + 500000, range(number % 20 + 1)::Array(UInt64) from numbers(100000);" diff --git a/tests/queries/0_stateless/02941_variant_type_3.sh b/tests/queries/0_stateless/02941_variant_type_3.sh index a0efead280a3..9fbdf6de8a7a 100755 --- a/tests/queries/0_stateless/02941_variant_type_3.sh +++ b/tests/queries/0_stateless/02941_variant_type_3.sh @@ -15,7 +15,7 @@ function test5_insert() $CH_CLIENT -nmq " insert into test select number, NULL from numbers(200000); insert into test select number + 200000, number % 2 ? NULL : number from numbers(200000); -insert into test select number + 400000, number % 2 ? NULL : 'str_' || toString(number) from numbers(200000); +insert into test select number + 400000, number % 2 ? NULL : ('str_' || toString(number))::Variant(String) from numbers(200000); insert into test select number + 600000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(('lc_str_' || toString(number))::LowCardinality(String), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000); insert into test select number + 800000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000); insert into test select number + 1000000, number % 2 ? CAST(NULL, 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') : CAST(range(number % 20 + 1)::Array(UInt64), 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))') from numbers(200000);" diff --git a/tests/queries/0_stateless/02941_variant_type_4.sh b/tests/queries/0_stateless/02941_variant_type_4.sh index 336540d1e793..f6eaf2fcc9a1 100755 --- a/tests/queries/0_stateless/02941_variant_type_4.sh +++ b/tests/queries/0_stateless/02941_variant_type_4.sh @@ -12,7 +12,7 @@ CH_CLIENT="$CLICKHOUSE_CLIENT --allow_experimental_variant_type=1 --allow_suspic function test6_insert() { echo "test6 insert" - $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST('str_' || toString(number), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number % 20 + 1)::Array(UInt64), type)) as res from numbers(1200000);" + $CH_CLIENT -q "insert into test with 'Variant(String, UInt64, LowCardinality(String), Tuple(a UInt32, b UInt32), Array(UInt64))' as type select number, multiIf(number % 6 == 0, CAST(NULL, type), number % 6 == 1, CAST(('str_' || toString(number))::Variant(String), type), number % 6 == 2, CAST(number, type), number % 6 == 3, CAST(('lc_str_' || toString(number))::LowCardinality(String), type), number % 6 == 4, CAST(tuple(number, number + 1)::Tuple(a UInt32, b UInt32), type), CAST(range(number % 20 + 1)::Array(UInt64), type)) as res from numbers(1200000);" } function test6_select() diff --git a/tests/queries/0_stateless/02942_variant_cast.reference b/tests/queries/0_stateless/02942_variant_cast.reference index f3fd7a9ba33b..d300ca655a60 100644 --- a/tests/queries/0_stateless/02942_variant_cast.reference +++ b/tests/queries/0_stateless/02942_variant_cast.reference @@ -6,7 +6,7 @@ \N Hello Hello -NULL +\N Hello Hello \N diff --git a/tests/queries/0_stateless/03032_string_to_variant_cast.reference b/tests/queries/0_stateless/03032_string_to_variant_cast.reference new file mode 100644 index 000000000000..6531e2206a57 --- /dev/null +++ b/tests/queries/0_stateless/03032_string_to_variant_cast.reference @@ -0,0 +1,13 @@ +42 UInt64 +abc String +\N None +[1,2,3] Array(UInt64) +[1, 2, 3 String +\N None +42 UInt64 +42 UInt64 +42 UInt64 +\N None +42 UInt64 +\N None +\N None diff --git a/tests/queries/0_stateless/03032_string_to_variant_cast.sql b/tests/queries/0_stateless/03032_string_to_variant_cast.sql new file mode 100644 index 000000000000..67a501b96283 --- /dev/null +++ b/tests/queries/0_stateless/03032_string_to_variant_cast.sql @@ -0,0 +1,17 @@ +set allow_experimental_variant_type=1; +select CAST('42', 'Variant(String, UInt64)') as v, variantType(v); +select CAST('abc', 'Variant(String, UInt64)') as v, variantType(v); +select CAST('null', 'Variant(String, UInt64)') as v, variantType(v); +select CAST('[1, 2, 3]', 'Variant(String, Array(UInt64))') as v, variantType(v); +select CAST('[1, 2, 3', 'Variant(String, Array(UInt64))') as v, variantType(v); +select CAST('42', 'Variant(Date)') as v, variantType(v); -- {serverError INCORRECT_DATA} +select accurateCastOrNull('42', 'Variant(Date)') as v, variantType(v); + +select CAST('42'::FixedString(2), 'Variant(String, UInt64)') as v, variantType(v); +select CAST('42'::LowCardinality(String), 'Variant(String, UInt64)') as v, variantType(v); +select CAST('42'::Nullable(String), 'Variant(String, UInt64)') as v, variantType(v); +select CAST(NULL::Nullable(String), 'Variant(String, UInt64)') as v, variantType(v); +select CAST('42'::LowCardinality(Nullable(String)), 'Variant(String, UInt64)') as v, variantType(v); +select CAST(NULL::LowCardinality(Nullable(String)), 'Variant(String, UInt64)') as v, variantType(v); +select CAST(NULL::LowCardinality(Nullable(FixedString(2))), 'Variant(String, UInt64)') as v, variantType(v); + From ab327e69c525eb766688f67c7aae7f04bb14de81 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 20:53:52 +0000 Subject: [PATCH 042/470] Add docs about converting from string --- docs/en/sql-reference/data-types/variant.md | 57 ++++++++++++++++++--- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 7d10d4b0e977..24923e4d30fb 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -192,20 +192,65 @@ SELECT toTypeName(variantType(v)) FROM test LIMIT 1; ## Conversion between Variant column and other columns -There are 3 possible conversions that can be performed with Variant column. +There are 4 possible conversions that can be performed with Variant column. + +### Converting String column to a Variant column + +Conversion from `String` to `Variant` is performed by parsing a value of `Variant` type from the string value: + +```sql +SELECT '42'::Variant(String, UInt64) as variant, variantType(variant) as variant_type +``` + +```text +┌─variant─┬─variant_type─┐ +│ 42 │ UInt64 │ +└─────────┴──────────────┘ +``` + +```sql +SELECT '[1, 2, 3]'::Variant(String, Array(UInt64)) as variant, variantType(variant) as variant_type +``` + +```text +┌─variant─┬─variant_type──┐ +│ [1,2,3] │ Array(UInt64) │ +└─────────┴───────────────┘ +``` + +```sql +SELECT CAST(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01'), 'Map(String, Variant(UInt64, Bool, Date))') as map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) as map_of_variant_types``` +``` + +```text +┌─map_of_variants─────────────────────────────┬─map_of_variant_types──────────────────────────┐ +│ {'key1':42,'key2':true,'key3':'2020-01-01'} │ {'key1':'UInt64','key2':'Bool','key3':'Date'} │ +└─────────────────────────────────────────────┴───────────────────────────────────────────────┘ +``` ### Converting an ordinary column to a Variant column It is possible to convert ordinary column with type `T` to a `Variant` column containing this type: ```sql -SELECT toTypeName(variant) as type_name, 'Hello, World!'::Variant(UInt64, String, Array(UInt64)) as variant; -``` +SELECT toTypeName(variant) as type_name, [1,2,3]::Array(UInt64)::Variant(UInt64, String, Array(UInt64)) as variant, variantType(variant) as variant_name + ``` ```text -┌─type_name──────────────────────────────┬─variant───────┐ -│ Variant(Array(UInt64), String, UInt64) │ Hello, World! │ -└────────────────────────────────────────┴───────────────┘ +┌─type_name──────────────────────────────┬─variant─┬─variant_name──┐ +│ Variant(Array(UInt64), String, UInt64) │ [1,2,3] │ Array(UInt64) │ +└────────────────────────────────────────┴─────────┴───────────────┘ +``` + +Note: converting from `String` type is always performed through parsing, if you need to convert `String` column to `String` variant of a `Variant` without parsing, you can do the following: +```sql +SELECT '[1, 2, 3]'::Variant(String)::Variant(String, Array(UInt64), UInt64) as variant, variantType(variant) as variant_type +``` + +```sql +┌─variant───┬─variant_type─┐ +│ [1, 2, 3] │ String │ +└───────────┴──────────────┘ ``` ### Converting a Variant column to an ordinary column From d2e375ce33778d8fe66290f46353ac8b8a1f8187 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 20:55:56 +0000 Subject: [PATCH 043/470] Better docs --- docs/en/sql-reference/data-types/variant.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 24923e4d30fb..e41f3c3119f8 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -190,11 +190,11 @@ SELECT toTypeName(variantType(v)) FROM test LIMIT 1; └─────────────────────────────────────────────────────────────────────┘ ``` -## Conversion between Variant column and other columns +## Conversion between a Variant column and other columns -There are 4 possible conversions that can be performed with Variant column. +There are 4 possible conversions that can be performed with a column of type `Variant`. -### Converting String column to a Variant column +### Converting a String column to a Variant column Conversion from `String` to `Variant` is performed by parsing a value of `Variant` type from the string value: @@ -230,7 +230,7 @@ SELECT CAST(map('key1', '42', 'key2', 'true', 'key3', '2020-01-01'), 'Map(String ### Converting an ordinary column to a Variant column -It is possible to convert ordinary column with type `T` to a `Variant` column containing this type: +It is possible to convert an ordinary column with type `T` to a `Variant` column containing this type: ```sql SELECT toTypeName(variant) as type_name, [1,2,3]::Array(UInt64)::Variant(UInt64, String, Array(UInt64)) as variant, variantType(variant) as variant_name From 90144f7ae4d7c4238b1578a365cd2f6b1b44cb8f Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 21:31:15 +0000 Subject: [PATCH 044/470] Fix style --- src/Functions/FunctionsConversion.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 6a1a2d5e888b..448a5fd8fc6e 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -89,7 +89,6 @@ namespace ErrorCodes extern const int ILLEGAL_TYPE_OF_ARGUMENT; extern const int NOT_IMPLEMENTED; extern const int CANNOT_INSERT_NULL_IN_ORDINARY_COLUMN; - extern const int CANNOT_PARSE_BOOL; extern const int VALUE_IS_OUT_OF_RANGE_OF_DATA_TYPE; } From c39cd75dff6b31c05dc179636a7d6d06e1903bf4 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 22:15:29 +0000 Subject: [PATCH 045/470] Support Variant in JSONExtract functions --- src/Functions/FunctionsJSON.h | 44 ++++++++++++++++++- .../03034_json_extract_variant.reference | 5 +++ .../03034_json_extract_variant.sql | 6 +++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03034_json_extract_variant.reference create mode 100644 tests/queries/0_stateless/03034_json_extract_variant.sql diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 8a193785f875..af7cdeccba4e 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -10,7 +10,7 @@ #include #include -#include +//#include #include #include @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -35,6 +36,8 @@ #include #include #include +#include +#include #include #include @@ -1234,6 +1237,35 @@ struct JSONExtractTree std::unique_ptr value; }; + class VariantNode : public Node + { + public: + VariantNode(std::vector> variant_nodes_, std::vector order_) : variant_nodes(std::move(variant_nodes_)), order(std::move(order_)) { } + + bool insertResultToColumn(IColumn & dest, const Element & element) override + { + auto & column_variant = assert_cast(dest); + for (size_t i : order) + { + auto & variant = column_variant.getVariantByGlobalDiscriminator(i); + if (variant_nodes[i]->insertResultToColumn(variant, element)) + { + column_variant.getLocalDiscriminators().push_back(column_variant.localDiscriminatorByGlobal(i)); + column_variant.getOffsets().push_back(variant.size() - 1); + return true; + } + } + + return false; + } + + private: + std::vector> variant_nodes; + /// Order in which we should try variants nodes. + /// For example, String should be always the last one. + std::vector order; + }; + static std::unique_ptr build(const char * function_name, const DataTypePtr & type) { switch (type->getTypeId()) @@ -1310,6 +1342,16 @@ struct JSONExtractTree const auto & value_type = map_type.getValueType(); return std::make_unique(build(function_name, key_type), build(function_name, value_type)); } + case TypeIndex::Variant: + { + const auto & variant_type = static_cast(*type); + const auto & variants = variant_type.getVariants(); + std::vector> variant_nodes; + variant_nodes.reserve(variants.size()); + for (const auto & variant : variants) + variant_nodes.push_back(build(function_name, variant)); + return std::make_unique(std::move(variant_nodes), SerializationVariant::getVariantsDeserializeTextOrder(variants)); + } default: throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} doesn't support the return type schema: {}", diff --git a/tests/queries/0_stateless/03034_json_extract_variant.reference b/tests/queries/0_stateless/03034_json_extract_variant.reference new file mode 100644 index 000000000000..8d78d639fd26 --- /dev/null +++ b/tests/queries/0_stateless/03034_json_extract_variant.reference @@ -0,0 +1,5 @@ +42 UInt32 +Hello String +[1,2,3] Array(UInt32) +{'a':42,'b':'Hello','c':[1,2,3]} +[('a',42),('b','Hello'),('c',[1,2,3])] Array(Tuple(String, Variant(Array(UInt32), String, UInt32))) diff --git a/tests/queries/0_stateless/03034_json_extract_variant.sql b/tests/queries/0_stateless/03034_json_extract_variant.sql new file mode 100644 index 000000000000..54d5bed9582c --- /dev/null +++ b/tests/queries/0_stateless/03034_json_extract_variant.sql @@ -0,0 +1,6 @@ +select JSONExtract('{"a" : 42}', 'a', 'Variant(String, UInt32)') as v, variantType(v); +select JSONExtract('{"a" : "Hello"}', 'a', 'Variant(String, UInt32)') as v, variantType(v); +select JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(String, Array(UInt32))') as v, variantType(v); +select JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))'); +select JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') as v, toTypeName(v); + From 0772536a4c6addf790fca729611feeb430a0d63a Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 22:26:44 +0000 Subject: [PATCH 046/470] Add examples in docs --- docs/en/sql-reference/data-types/variant.md | 34 +++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 7d10d4b0e977..668a1b260f40 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -395,3 +395,37 @@ SELECT v, variantType(v) FROM test ORDER by v; │ 100 │ UInt32 │ └─────┴────────────────┘ ``` + +## JSONExtract functions with Variant + +All `JSONExtract*` functions support `Variant` type: + +```sql +SELECT JSONExtract('{"a" : [1, 2, 3]}', 'a', 'Variant(UInt32, String, Array(UInt32))') AS variant, variantType(variant) AS variant_type; +``` + +```text +┌─variant─┬─variant_type──┐ +│ [1,2,3] │ Array(UInt32) │ +└─────────┴───────────────┘ +``` + +```sql +SELECT JSONExtract('{"obj" : {"a" : 42, "b" : "Hello", "c" : [1,2,3]}}', 'obj', 'Map(String, Variant(UInt32, String, Array(UInt32)))') AS map_of_variants, mapApply((k, v) -> (k, variantType(v)), map_of_variants) AS map_of_variant_types +``` + +```text +┌─map_of_variants──────────────────┬─map_of_variant_types────────────────────────────┐ +│ {'a':42,'b':'Hello','c':[1,2,3]} │ {'a':'UInt32','b':'String','c':'Array(UInt32)'} │ +└──────────────────────────────────┴─────────────────────────────────────────────────┘ +``` + +```sql +SELECT JSONExtractKeysAndValues('{"a" : 42, "b" : "Hello", "c" : [1,2,3]}', 'Variant(UInt32, String, Array(UInt32))') AS variants, arrayMap(x -> (x.1, variantType(x.2)), variants) AS variant_types +``` + +```text +┌─variants───────────────────────────────┬─variant_types─────────────────────────────────────────┐ +│ [('a',42),('b','Hello'),('c',[1,2,3])] │ [('a','UInt32'),('b','String'),('c','Array(UInt32)')] │ +└────────────────────────────────────────┴───────────────────────────────────────────────────────┘ +``` From 75be74874e15edf718b205f5ee2836ce7c5e54b6 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 27 Mar 2024 22:27:27 +0000 Subject: [PATCH 047/470] Fix include --- src/Functions/FunctionsJSON.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index af7cdeccba4e..33dd40aed90f 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -10,7 +10,7 @@ #include #include -//#include +#include #include #include From afbb3f8a506b29ef085b67d936cb2dc1321fa20f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 28 Mar 2024 09:26:00 +0100 Subject: [PATCH 048/470] Added wait before commit blocks --- src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 9432cdf9fef4..fe64415191c5 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -150,6 +150,7 @@ void WriteBufferFromAzureBlobStorage::finalizeImpl() if (!block_ids.empty()) { + task_tracker->waitAll(); auto block_blob_client = blob_container_client->GetBlockBlobClient(blob_path); execWithRetry([&](){ block_blob_client.CommitBlockList(block_ids); }, max_unexpected_write_error_retries); LOG_TRACE(log, "Committed {} blocks for blob `{}`", block_ids.size(), blob_path); From e32497345013da3ccaf04467d5e52318c82d837f Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 12:36:11 +0100 Subject: [PATCH 049/470] Standardize function formatting for MultiSearchAllPositionsXYZ and MultiSearchAnyXYZ functions --- .../functions/string-search-functions.md | 221 +++++++++++++++++- 1 file changed, 219 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 22f879c62aee..c4b5fb7c1f5b 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -178,11 +178,106 @@ Result: │ [0,13,0] │ └───────────────────────────────────────────────────────────────────┘ ``` +## multiSearchAllPositionsCaseInsensitive + +Like [multiSearchAllPositions](#multisearchallpositions) but ignores case. + +**Syntax** + +```sql +multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Array of the starting position in bytes and counting from 1 (if the substring was found). +- 0 if the substring was not found. + +**Example** + +Query: + +```sql +SELECT multiSearchAllPositionsCaseInsensitive('ClickHouse',['c','h']); +``` + +```response +["1","6"] +``` ## multiSearchAllPositionsUTF8 Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings. +**Syntax** + +```sql +multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Array of the starting position in bytes and counting from 1 (if the substring was found). +- 0 if the substring was not found. + +**Example** + +Given `ClickHouse` as a UTF-8 string, find the positions of `C` ('\x43') and `H` ('\x48'). + +Query: + +```sql +SELECT multiSearchAllPositionsUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']); +``` + +```response +["1","6"] +``` + +## multiSearchAllPositionsCaseInsensitiveUTF8 + +Like [multiSearchAllPositionsUTF8](#multisearchallpositionsutf8) but ignores case. + +**Syntax** + +```sql +multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Array of the starting position in bytes and counting from 1 (if the substring was found). +- 0 if the substring was not found. + +**Example** + +Given `ClickHouse` as a UTF-8 string, find the positions of `c` (`\x63`) and `h` (`\x68`). + +Query: + +```sql +SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x63','\x68']); +``` + +```response +["1","6"] +``` + ## multiSearchFirstPosition Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. @@ -211,12 +306,134 @@ multiSearchFirstIndex(haystack, \[needle1, needle2, …, n Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. -Functions `multiSearchAnyCaseInsensitive`, `multiSearchAnyUTF8` and `multiSearchAnyCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchAnyCaseInsensitive`](#multiSearchAnyCaseInsensitive), [`multiSearchAnyUTF8`](#multiSearchAnyUTF8) and []`multiSearchAnyCaseInsensitiveUTF8`](#multiSearchAnyCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** ```sql -multiSearchAny(haystack, \[needle1, needle2, …, needlen\]) +multiSearchAny(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- 1, if there was at least one match. +- 0, if there was not at least one match. + +**Example** + +Query: + +```sql +SELECT multiSearchAny('ClickHouse',['C','H']); +``` + +```response +1 +``` + +## multiSearchAnyCaseInsensitive {#multiSearchAnyCaseInsensitive} + +Like [multiSearchAny](#multisearchany) but ignores case. + +**Syntax** + +```sql +multiSearchAnyCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- 1, if there was at least one case-insensitive match. +- 0, if there was not at least one case-insensitive match. + +**Example** + +Query: + +```sql +SELECT multiSearchAnyCaseInsensitive('ClickHouse',['c','h']); +``` + +```response +1 +``` + +## multiSearchAnyUTF8 {#multiSearchAnyUTF8} + +Like [multiSearchAny](#multisearchany) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings. + +*Syntax** + +```sql +multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- 1, if there was at least one match. +- 0, if there was not at least one match. + +**Example** + +Given `ClickHouse` as a UTF-8 string, check if there are any `C` ('\x43') or `H` ('\x48') letters in the word. + +Query: + +```sql +SELECT multiSearchAnyUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']); +``` + +```response +1 +``` + +## multiSearchAnyCaseInsensitiveUTF8 {#multiSearchAnyCaseInsensitiveUTF8} + +Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case. + +*Syntax** + +```sql +multiSearchAnyCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- 1, if there was at least one case-insensitive match. +- 0, if there was not at least one case-insensitive match. + +**Example** + +Given `ClickHouse` as a UTF-8 string, check if there is any letter `h`(`\x68`) in the word, ignoring case. + +Query: + +```sql +SELECT multiSearchAnyCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x68']); +``` + +```response +1 ``` ## match {#match} From f1ae99b1136218a1ff8cd281293868b7fdabf9fe Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Wed, 27 Mar 2024 12:55:04 +0800 Subject: [PATCH 050/470] Support ALTER MODIFY SETTING for Memory tables --- .../engines/table-engines/special/memory.md | 5 ++ src/Storages/MemorySettings.cpp | 19 ++++- src/Storages/MemorySettings.h | 3 + src/Storages/StorageMemory.cpp | 50 ++++++++++---- src/Storages/StorageMemory.h | 12 ++-- ...2_storage_memory_modify_settings.reference | 16 +++++ .../03032_storage_memory_modify_settings.sql | 69 +++++++++++++++++++ 7 files changed, 151 insertions(+), 23 deletions(-) create mode 100644 tests/queries/0_stateless/03032_storage_memory_modify_settings.reference create mode 100644 tests/queries/0_stateless/03032_storage_memory_modify_settings.sql diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index 19b5c798a76b..4f5d8a1d5e08 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -37,6 +37,11 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec - `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block. - Default value: `0` +**Modify settings** +```sql +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; +``` + ## Usage {#usage} diff --git a/src/Storages/MemorySettings.cpp b/src/Storages/MemorySettings.cpp index f5e182b3484e..4968f2b3b16c 100644 --- a/src/Storages/MemorySettings.cpp +++ b/src/Storages/MemorySettings.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -11,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int UNKNOWN_SETTING; + extern const int SETTING_CONSTRAINT_VIOLATION; } IMPLEMENT_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS) @@ -32,5 +32,22 @@ void MemorySettings::loadFromQuery(ASTStorage & storage_def) } } +ASTPtr MemorySettings::getSettingsChangesQuery() +{ + auto settings_ast = std::make_shared(); + settings_ast->is_standalone = false; + for (const auto & change : changes()) + settings_ast->changes.push_back(change); + + return settings_ast; +} + +void MemorySettings::sanityCheck() const +{ + if (min_bytes_to_keep > max_bytes_to_keep + || min_rows_to_keep > max_rows_to_keep) + throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Min. bytes / rows must be set with a max."); +} + } diff --git a/src/Storages/MemorySettings.h b/src/Storages/MemorySettings.h index ac6cdf73329c..f650746c4b28 100644 --- a/src/Storages/MemorySettings.h +++ b/src/Storages/MemorySettings.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -24,6 +25,8 @@ DECLARE_SETTINGS_TRAITS(memorySettingsTraits, MEMORY_SETTINGS) struct MemorySettings : public BaseSettings { void loadFromQuery(ASTStorage & storage_def); + ASTPtr getSettingsChangesQuery(); + void sanityCheck() const; }; } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index c6222d2124ea..2de2878002ae 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -76,7 +76,7 @@ class MemorySink : public SinkToStorage convertDynamicColumnsToTuples(block, storage_snapshot); } - if (storage.compress) + if (storage.getMemorySettingsRef().compress) { Block compressed_block; for (const auto & elem : block) @@ -106,15 +106,16 @@ class MemorySink : public SinkToStorage auto new_data = std::make_unique(*(storage.data.get())); UInt64 new_total_rows = storage.total_size_rows.load(std::memory_order_relaxed) + inserted_rows; UInt64 new_total_bytes = storage.total_size_bytes.load(std::memory_order_relaxed) + inserted_bytes; + auto & memory_settings = storage.getMemorySettingsRef(); while (!new_data->empty() - && ((storage.max_bytes_to_keep && new_total_bytes > storage.max_bytes_to_keep) - || (storage.max_rows_to_keep && new_total_rows > storage.max_rows_to_keep))) + && ((memory_settings.max_bytes_to_keep && new_total_bytes > memory_settings.max_bytes_to_keep) + || (memory_settings.max_rows_to_keep && new_total_rows > memory_settings.max_rows_to_keep))) { Block oldest_block = new_data->front(); UInt64 rows_to_remove = oldest_block.rows(); UInt64 bytes_to_remove = oldest_block.allocatedBytes(); - if (new_total_bytes - bytes_to_remove < storage.min_bytes_to_keep - || new_total_rows - rows_to_remove < storage.min_rows_to_keep) + if (new_total_bytes - bytes_to_remove < memory_settings.min_bytes_to_keep + || new_total_rows - rows_to_remove < memory_settings.min_rows_to_keep) { break; // stop - removing next block will put us under min_bytes / min_rows threshold } @@ -145,15 +146,16 @@ StorageMemory::StorageMemory( ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment, - const MemorySettings & settings) - : IStorage(table_id_), data(std::make_unique()), compress(settings.compress), - min_rows_to_keep(settings.min_rows_to_keep), max_rows_to_keep(settings.max_rows_to_keep), - min_bytes_to_keep(settings.min_bytes_to_keep), max_bytes_to_keep(settings.max_bytes_to_keep) + const MemorySettings & memory_settings_) + : IStorage(table_id_) + , data(std::make_unique()) + , memory_settings(memory_settings_) { StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(std::move(columns_description_)); storage_metadata.setConstraints(std::move(constraints_)); storage_metadata.setComment(comment); + storage_metadata.setSettingsChanges(memory_settings.getSettingsChangesQuery()); setInMemoryMetadata(storage_metadata); } @@ -239,7 +241,7 @@ void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context Block block; while (executor.pull(block)) { - if (compress) + if (memory_settings.compress) for (auto & elem : block) elem.column = elem.column->compress(); @@ -294,6 +296,25 @@ void StorageMemory::truncate( total_size_rows.store(0, std::memory_order_relaxed); } +void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr context, DB::IStorage::AlterLockHolder & /*alter_lock_holder*/) +{ + auto table_id = getStorageID(); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + params.apply(new_metadata, context); + + if (params.isSettingsAlter()) + { + auto & settings_changes = new_metadata.settings_changes->as(); + auto copy = memory_settings; + copy.applyChanges(settings_changes.changes); + copy.sanityCheck(); + memory_settings = std::move(copy); + } + + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); + setInMemoryMetadata(new_metadata); +} + namespace { @@ -499,7 +520,7 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat while (auto block = block_in.read()) { - if (compress) + if (memory_settings.compress) { Block compressed_block; for (const auto & elem : block) @@ -534,7 +555,8 @@ void StorageMemory::checkAlterIsPossible(const AlterCommands & commands, Context { if (command.type != AlterCommand::Type::ADD_COLUMN && command.type != AlterCommand::Type::MODIFY_COLUMN && command.type != AlterCommand::Type::DROP_COLUMN && command.type != AlterCommand::Type::COMMENT_COLUMN - && command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN) + && command.type != AlterCommand::Type::COMMENT_TABLE && command.type != AlterCommand::Type::RENAME_COLUMN + && command.type != AlterCommand::Type::MODIFY_SETTING) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Alter of type '{}' is not supported by storage {}", command.type, getName()); } @@ -566,9 +588,7 @@ void registerStorageMemory(StorageFactory & factory) if (has_settings) settings.loadFromQuery(*args.storage_def); - if (settings.min_bytes_to_keep > settings.max_bytes_to_keep - || settings.min_rows_to_keep > settings.max_rows_to_keep) - throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Min. bytes / rows must be set with a max."); + settings.sanityCheck(); return std::make_shared(args.table_id, args.columns, args.constraints, args.comment, settings); }, diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 13f1c971d823..50581aa0d61a 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -31,7 +31,7 @@ friend class MemorySink; ColumnsDescription columns_description_, ConstraintsDescription constraints_, const String & comment, - const MemorySettings & settings = MemorySettings()); + const MemorySettings & memory_settings_ = MemorySettings()); String getName() const override { return "Memory"; } @@ -46,6 +46,8 @@ friend class MemorySink; StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; + const MemorySettings & getMemorySettingsRef() const { return memory_settings; } + void read( QueryPlan & query_plan, const Names & column_names, @@ -78,6 +80,7 @@ friend class MemorySink; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; void checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const override; + void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & alter_lock_holder) override; std::optional totalRows(const Settings &) const override; std::optional totalBytes(const Settings &) const override; @@ -134,12 +137,7 @@ friend class MemorySink; std::atomic total_size_bytes = 0; std::atomic total_size_rows = 0; - bool compress; - UInt64 min_rows_to_keep; - UInt64 max_rows_to_keep; - UInt64 min_bytes_to_keep; - UInt64 max_bytes_to_keep; - + MemorySettings memory_settings; friend class ReadFromMemoryStorageStep; }; diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference new file mode 100644 index 000000000000..20dda4fa15af --- /dev/null +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference @@ -0,0 +1,16 @@ +TESTING BYTES +8192 +9216 +9216 +65536 +TESTING ROWS +50 +1000 +1020 +1100 +TESTING NO CIRCULAR-BUFFER +8192 +9216 +17408 +82944 +TESTING INVALID SETTINGS diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql new file mode 100644 index 000000000000..34be327175ea --- /dev/null +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql @@ -0,0 +1,69 @@ +SET max_block_size = 65409; -- Default value + +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; + +ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; + +SELECT 'TESTING BYTES'; +/* 1. testing oldest block doesn't get deleted because of min-threshold */ +INSERT INTO memory SELECT * FROM numbers(0, 1600); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 2. adding block that doesn't get deleted */ +INSERT INTO memory SELECT * FROM numbers(1000, 100); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */ +INSERT INTO memory SELECT * FROM numbers(9000, 1000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 4.check large block over-writes all bytes / rows */ +INSERT INTO memory SELECT * FROM numbers(9000, 10000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; + +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; + +SELECT 'TESTING ROWS'; +/* 1. add normal number of rows */ +INSERT INTO memory SELECT * FROM numbers(0, 50); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 2. table should have 1000 */ +INSERT INTO memory SELECT * FROM numbers(50, 950); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 3. table should have 1020 - removed first 50 */ +INSERT INTO memory SELECT * FROM numbers(2000, 70); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +/* 4. check large block over-writes all rows */ +INSERT INTO memory SELECT * FROM numbers(3000, 1100); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +SELECT 'TESTING NO CIRCULAR-BUFFER'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; + +INSERT INTO memory SELECT * FROM numbers(0, 1600); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +INSERT INTO memory SELECT * FROM numbers(1000, 100); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +INSERT INTO memory SELECT * FROM numbers(9000, 1000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +INSERT INTO memory SELECT * FROM numbers(9000, 10000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + +SELECT 'TESTING INVALID SETTINGS'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory; +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100; -- { serverError 452 } +ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 100; -- { serverError 452 } + +DROP TABLE memory; \ No newline at end of file From 7d20467f59b629a1d690a3347b7cd551c742496b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 28 Mar 2024 14:35:43 +0100 Subject: [PATCH 051/470] Fix test reference --- .../0_stateless/03013_ignore_drop_queries_probability.reference | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference index d81cc0710eb6..daaac9e30302 100644 --- a/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference +++ b/tests/queries/0_stateless/03013_ignore_drop_queries_probability.reference @@ -1 +1,2 @@ 42 +42 From 19b90d8348aec4dc4fc038c89155a169ba4d6224 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 28 Mar 2024 16:28:59 +0100 Subject: [PATCH 052/470] Resolve conflicts after merge, add system.filesystem_cache_settings --- src/Interpreters/Cache/EvictionCandidates.cpp | 23 +++++- src/Interpreters/Cache/EvictionCandidates.h | 14 ++++ src/Interpreters/Cache/FileCache.cpp | 21 ++++-- .../Cache/LRUFileCachePriority.cpp | 71 +++++++++--------- src/Interpreters/Cache/Metadata.cpp | 2 +- .../Cache/SLRUFileCachePriority.cpp | 2 +- .../StorageSystemFilesystemCacheSettings.cpp | 72 +++++++++++++++++++ .../StorageSystemFilesystemCacheSettings.h | 22 ++++++ src/Storages/System/attachSystemTables.cpp | 2 + ...resize_filesystem_cache_hardcore.reference | 7 ++ ...ically_resize_filesystem_cache_hardcore.sh | 46 ++++++++++++ 11 files changed, 236 insertions(+), 46 deletions(-) create mode 100644 src/Storages/System/StorageSystemFilesystemCacheSettings.cpp create mode 100644 src/Storages/System/StorageSystemFilesystemCacheSettings.h create mode 100644 tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.reference create mode 100755 tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.sh diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index cb15af727049..080ef6759181 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -57,13 +57,25 @@ void EvictionCandidates::add( ++candidates_size; } +void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & lock) +{ + for (const auto & [key, key_candidates] : candidates) + { + for (const auto & candidate : key_candidates.candidates) + candidate->getQueueIterator()->remove(lock); + } + invalidated_queue_entries = true; +} + void EvictionCandidates::evict() { if (candidates.empty()) return; auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); - queue_entries_to_invalidate.reserve(candidates_size); + + if (!invalidated_queue_entries) + queue_entries_to_invalidate.reserve(candidates_size); for (auto & [key, key_candidates] : candidates) { @@ -111,7 +123,9 @@ void EvictionCandidates::evict() /// it was freed in favour of some reserver, so we can make it visibly /// free only for that particular reserver. - queue_entries_to_invalidate.push_back(iterator); + if (!invalidated_queue_entries) + queue_entries_to_invalidate.push_back(iterator); + key_candidates.candidates.pop_back(); } } @@ -165,4 +179,9 @@ void EvictionCandidates::setSpaceHolder( hold_space = std::make_unique(size, elements, priority, lock); } +void EvictionCandidates::insert(EvictionCandidates && other, const CachePriorityGuard::Lock &) +{ + candidates.insert(make_move_iterator(other.candidates.begin()), make_move_iterator(other.candidates.end())); +} + } diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index 2745d508a5d0..571010a14bce 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -9,6 +9,13 @@ class EvictionCandidates public: using FinalizeEvictionFunc = std::function; + EvictionCandidates() = default; + EvictionCandidates(EvictionCandidates && other) noexcept + { + candidates = std::move(other.candidates); + candidates_size = std::move(other.candidates_size); + queue_entries_to_invalidate = std::move(other.queue_entries_to_invalidate); + } ~EvictionCandidates(); void add( @@ -16,8 +23,12 @@ class EvictionCandidates LockedKey & locked_key, const CachePriorityGuard::Lock &); + void insert(EvictionCandidates && other, const CachePriorityGuard::Lock &); + void evict(); + void removeQueueEntries(const CachePriorityGuard::Lock &); + void onFinalize(FinalizeEvictionFunc && func) { on_finalize.emplace_back(std::move(func)); } void finalize( @@ -47,7 +58,10 @@ class EvictionCandidates size_t candidates_size = 0; std::vector on_finalize; + std::vector queue_entries_to_invalidate; + bool invalidated_queue_entries = false; + IFileCachePriority::HoldSpacePtr hold_space; }; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index e845559e8a7e..75e199c544b2 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1383,7 +1383,7 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, if (new_settings.max_size != actual_settings.max_size || new_settings.max_elements != actual_settings.max_elements) { - std::vector evicted_paths; + std::optional eviction_candidates; { cache_is_being_resized.store(true, std::memory_order_relaxed); SCOPE_EXIT({ @@ -1391,18 +1391,27 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, }); auto cache_lock = lockCache(); + FileCacheReserveStat stat; - auto eviction_candidates = main_priority->collectCandidatesForEviction( - new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, stat, cache_lock); + eviction_candidates.emplace(main_priority->collectCandidatesForEviction( + new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, stat, cache_lock)); - evicted_paths = eviction_candidates.evictFromMemory(nullptr, cache_lock); + eviction_candidates->removeQueueEntries(cache_lock); main_priority->modifySizeLimits( new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); } - for (const auto & path : evicted_paths) - fs::remove(path); + try + { + eviction_candidates->evict(); + } + catch (...) + { + auto cache_lock = lockCache(); + eviction_candidates->finalize(nullptr, cache_lock); + throw; + } LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", actual_settings.max_size, new_settings.max_size, diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 4a80bce7658d..d1c46691c375 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -279,10 +279,42 @@ bool LRUFileCachePriority::collectCandidatesForEviction( auto can_fit = [&] { - return canFit(size, 1, stat.stat.releasable_size, stat.stat.releasable_count, lock); + return canFit(size, 1, stat.total_stat.releasable_size, stat.total_stat.releasable_count, lock); }; + iterateForEviction(res, stat, can_fit, lock); - return can_fit(); + if (can_fit()) + { + /// As eviction is done without a cache priority lock, + /// then if some space was partially available and some needed + /// to be freed via eviction, we need to make sure that this + /// partially available space is still available + /// after we finish with eviction for non-available space. + /// So we create a space holder for the currently available part + /// of the required space for the duration of eviction of the other + /// currently non-available part of the space. + + const size_t hold_size = size > stat.total_stat.releasable_size + ? size - stat.total_stat.releasable_size + : 0; + + const size_t hold_elements = elements > stat.total_stat.releasable_count + ? elements - stat.total_stat.releasable_count + : 0; + + if (hold_size || hold_elements) + res.setSpaceHolder(hold_size, hold_elements, *this, lock); + + // LOG_TEST(log, "Collected {} candidates for eviction (total size: {}). " + // "Took hold of size {} and elements {}", + // res.size(), stat.total_stat.releasable_size, hold_size, hold_elements); + + return true; + } + else + { + return false; + } } EvictionCandidates LRUFileCachePriority::collectCandidatesForEviction( @@ -295,7 +327,7 @@ EvictionCandidates LRUFileCachePriority::collectCandidatesForEviction( EvictionCandidates res; auto stop_condition = [&, this]() { - return canFit(0, 0, stat.stat.releasable_size, stat.stat.releasable_count, + return canFit(0, 0, stat.total_stat.releasable_size, stat.total_stat.releasable_count, lock, &desired_size, &desired_elements_count) || (max_candidates_to_evict && res.size() >= max_candidates_to_evict); }; @@ -334,39 +366,6 @@ void LRUFileCachePriority::iterateForEviction( { return stop_condition() ? IterationResult::BREAK : iterate_func(locked_key, segment_metadata); }, lock); - - if (can_fit()) - { - /// As eviction is done without a cache priority lock, - /// then if some space was partially available and some needed - /// to be freed via eviction, we need to make sure that this - /// partially available space is still available - /// after we finish with eviction for non-available space. - /// So we create a space holder for the currently available part - /// of the required space for the duration of eviction of the other - /// currently non-available part of the space. - - const size_t hold_size = size > stat.total_stat.releasable_size - ? size - stat.total_stat.releasable_size - : 0; - - const size_t hold_elements = elements > stat.total_stat.releasable_count - ? elements - stat.total_stat.releasable_count - : 0; - - if (hold_size || hold_elements) - res.setSpaceHolder(hold_size, hold_elements, *this, lock); - - // LOG_TEST(log, "Collected {} candidates for eviction (total size: {}). " - // "Took hold of size {} and elements {}", - // res.size(), stat.total_stat.releasable_size, hold_size, hold_elements); - - return true; - } - else - { - return false; - } } LRUFileCachePriority::LRUIterator LRUFileCachePriority::move( diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 082c33032f27..26611f023797 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -941,7 +941,7 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( file_segment->detach(segment_lock, *this); - if (!remove_only_metadata) + // if (!remove_only_metadata) { try { diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index df4d4276e89d..79ca489cea27 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -263,7 +263,7 @@ EvictionCandidates SLRUFileCachePriority::collectCandidatesForEviction( desired_probationary_size, desired_probationary_elements_num, max_candidates_to_evict, stat, lock); chassert(!max_candidates_to_evict || res.size() <= max_candidates_to_evict); - chassert(res.size() == stat.stat.releasable_count); + chassert(res.size() == stat.total_stat.releasable_count); if (max_candidates_to_evict && res.size() == max_candidates_to_evict) return res; diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp new file mode 100644 index 000000000000..8915032baf70 --- /dev/null +++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.cpp @@ -0,0 +1,72 @@ +#include "StorageSystemFilesystemCacheSettings.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +ColumnsDescription StorageSystemFilesystemCacheSettings::getColumnsDescription() +{ + return ColumnsDescription + { + {"cache_name", std::make_shared(), "Name of the cache object"}, + {"path", std::make_shared(), "Cache directory"}, + {"max_size", std::make_shared(), "Cache size limit by the number of bytes"}, + {"max_elements", std::make_shared(), "Cache size limit by the number of elements"}, + {"current_size", std::make_shared(), "Current cache size by the number of bytes"}, + {"current_elements", std::make_shared(), "Current cache size by the number of elements"}, + {"max_file_segment_size", std::make_shared(), "Maximum allowed file segment size"}, + {"boundary_alignment", std::make_shared(), "Boundary alignment of file segments"}, + {"cache_on_write_operations", std::make_shared(), "Write-through cache enablemenet setting"}, + {"cache_hits_threshold", std::make_shared(), "Cache hits threshold enablemenet setting"}, + {"background_download_threads", std::make_shared(), "Number of background download threads"}, + {"background_download_queue_size_limit", std::make_shared(), "Queue size limit for background download"}, + {"load_metadata_threads", std::make_shared(), "Number of load metadata threads"}, + {"enable_bypass_cache_threshold", std::make_shared(), "Bypass cache threshold limit enablement setting"}, + }; +} + +StorageSystemFilesystemCacheSettings::StorageSystemFilesystemCacheSettings(const StorageID & table_id_) + : IStorageSystemOneBlock(table_id_, getColumnsDescription()) +{ +} + +void StorageSystemFilesystemCacheSettings::fillData( + MutableColumns & res_columns, ContextPtr context, const ActionsDAG::Node *, std::vector) const +{ + context->checkAccess(AccessType::SHOW_FILESYSTEM_CACHES); + + auto caches = FileCacheFactory::instance().getAll(); + + for (const auto & [cache_name, cache_data] : caches) + { + const auto & settings = cache_data->getSettings(); + const auto & cache = cache_data->cache; + + size_t i = 0; + res_columns[i++]->insert(cache_name); + res_columns[i++]->insert(settings.base_path); + res_columns[i++]->insert(settings.max_size); + res_columns[i++]->insert(settings.max_elements); + res_columns[i++]->insert(cache->getUsedCacheSize()); + res_columns[i++]->insert(cache->getFileSegmentsNum()); + res_columns[i++]->insert(settings.max_file_segment_size); + res_columns[i++]->insert(settings.boundary_alignment); + res_columns[i++]->insert(settings.cache_on_write_operations); + res_columns[i++]->insert(settings.cache_hits_threshold); + res_columns[i++]->insert(settings.background_download_threads); + res_columns[i++]->insert(settings.background_download_queue_size_limit); + res_columns[i++]->insert(settings.load_metadata_threads); + res_columns[i++]->insert(settings.enable_bypass_cache_with_threshold); + } +} + +} diff --git a/src/Storages/System/StorageSystemFilesystemCacheSettings.h b/src/Storages/System/StorageSystemFilesystemCacheSettings.h new file mode 100644 index 000000000000..59a123c32c12 --- /dev/null +++ b/src/Storages/System/StorageSystemFilesystemCacheSettings.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StorageSystemFilesystemCacheSettings final : public IStorageSystemOneBlock +{ +public: + explicit StorageSystemFilesystemCacheSettings(const StorageID & table_id_); + + std::string getName() const override { return "SystemFilesystemCacheSettings"; } + + static ColumnsDescription getColumnsDescription(); + +protected: + void fillData(MutableColumns & res_columns, ContextPtr, const ActionsDAG::Node *, std::vector) const override; +}; + +} diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index cd8be60e342c..6ff86b26ca9e 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -78,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -213,6 +214,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "part_moves_between_shards", "Contains information about parts which are currently in a process of moving between shards and their progress."); attach(context, system_database, "asynchronous_inserts", "Contains information about pending asynchronous inserts in queue in server's memory."); attachNoDescription(context, system_database, "filesystem_cache", "Contains information about all entries inside filesystem cache for remote objects."); + attachNoDescription(context, system_database, "filesystem_cache_settings", "Contains information about all filesystem cache settings"); attachNoDescription(context, system_database, "query_cache", "Contains information about all entries inside query cache in server's memory."); attachNoDescription(context, system_database, "remote_data_paths", "Contains a mapping from a filename on local filesystem to a blob name inside object storage."); attach(context, system_database, "certificates", "Contains information about available certificates and their sources."); diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.reference b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.reference new file mode 100644 index 000000000000..fcb49fa99454 --- /dev/null +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.reference @@ -0,0 +1,7 @@ +1 +1 +1 +1 +1 +1 +1 diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.sh new file mode 100755 index 000000000000..79c43048b890 --- /dev/null +++ b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-parallel, no-s3-storage, no-random-settings + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +disk_name="s3_cache" + +$CLICKHOUSE_CLIENT -nm --query " +DROP TABLE IF EXISTS test; +CREATE TABLE test (a String) engine=MergeTree() ORDER BY tuple() SETTINGS disk = '$disk_name'; +INSERT INTO test SELECT randomString(1000); +" + +$CLICKHOUSE_CLIENT --query "SELECT * FROM test FORMAT Null" + +prev_max_size=$($CLICKHOUSE_CLIENT --query "SELECT max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") +$CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" + +config_path=/etc/clickhouse-server/config.d/storage_conf.xml +config_path_tmp=$config_path.tmp + +new_max_size=$($CLICKHOUSE_CLIENT --query "SELECT divide(max_size, 2) FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name'") +sed -i "s|$prev_max_size<\/max_size>|$new_max_size<\/max_size>|" $config_path + +# echo $prev_max_size +# echo $new_max_size + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" + +$CLICKHOUSE_CLIENT --query "SELECT max_size == $new_max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size <= max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" + +sed -i "s|$new_max_size<\/max_size>|$prev_max_size<\/max_size>|" $config_path + +$CLICKHOUSE_CLIENT -nm --query " +set send_logs_level='fatal'; +SYSTEM RELOAD CONFIG" + +$CLICKHOUSE_CLIENT --query "SELECT max_size == $prev_max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size > 0 FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" +$CLICKHOUSE_CLIENT --query "SELECT current_size <= max_size FROM system.filesystem_cache_settings WHERE cache_name = '$disk_name' FORMAT TabSeparated" From 1d453af6ff2f49fb2dc3beabbcdc2e00fe21757f Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 28 Mar 2024 16:47:22 +0100 Subject: [PATCH 053/470] Fix style check --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index d1c46691c375..78ece5a31245 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -16,9 +16,6 @@ namespace ProfileEvents { extern const Event FilesystemCacheEvictionSkippedFileSegments; extern const Event FilesystemCacheEvictionTries; - extern const Event FilesystemCacheEvictMicroseconds; - extern const Event FilesystemCacheEvictedBytes; - extern const Event FilesystemCacheEvictedFileSegments; extern const Event FilesystemCacheEvictionSkippedEvictingFileSegments; } From e2b0fde0f077b8961e67a51f2f0c8ff2fe12ab74 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 28 Mar 2024 17:22:14 +0100 Subject: [PATCH 054/470] Remove unused setting --- src/Storages/StorageMemory.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 2de2878002ae..013c9f66c2c4 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -46,7 +46,6 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int CANNOT_RESTORE_TABLE; extern const int NOT_IMPLEMENTED; - extern const int SETTING_CONSTRAINT_VIOLATION; } class MemorySink : public SinkToStorage From 7fb4ace6ef8a39c826e6726675015b6ab3fd2391 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 28 Mar 2024 19:24:10 +0100 Subject: [PATCH 055/470] clang fix --- src/Storages/StorageMemory.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 013c9f66c2c4..055d9ee6fa1a 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -105,7 +105,7 @@ class MemorySink : public SinkToStorage auto new_data = std::make_unique(*(storage.data.get())); UInt64 new_total_rows = storage.total_size_rows.load(std::memory_order_relaxed) + inserted_rows; UInt64 new_total_bytes = storage.total_size_bytes.load(std::memory_order_relaxed) + inserted_bytes; - auto & memory_settings = storage.getMemorySettingsRef(); + const auto & memory_settings = storage.getMemorySettingsRef(); while (!new_data->empty() && ((memory_settings.max_bytes_to_keep && new_total_bytes > memory_settings.max_bytes_to_keep) || (memory_settings.max_rows_to_keep && new_total_rows > memory_settings.max_rows_to_keep))) From ef826ee881b2c260999845f86820389c4378e2ab Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 19:47:50 +0100 Subject: [PATCH 056/470] Standardize format of multiSearchFirstIndexXYZ functions --- .../functions/string-search-functions.md | 125 +++++++++++++++++- 1 file changed, 123 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index c4b5fb7c1f5b..c067f9010fae 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -294,12 +294,133 @@ multiSearchFirstPosition(haystack, \[needle1, needle2, … Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. -Functions `multiSearchFirstIndexCaseInsensitive`, `multiSearchFirstIndexUTF8` and `multiSearchFirstIndexCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchFirstIndexCaseInsensitive`](#multiSearchFirstIndexCaseInsensitive), [`multiSearchFirstIndexUTF8`](#multiSearchFirstIndexUTF8) and [`multiSearchFirstIndexCaseInsensitiveUTF8`](#multiSearchFirstIndexCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** ```sql -multiSearchFirstIndex(haystack, \[needle1, needle2, …, needlen\]) +multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN]) +``` +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstIndex('Hello World',['World','Hello']); +``` + +```response +1 +``` + +## multiSearchFirstIndexCaseInsensitive {#multiSearchFirstIndexCaseInsensitive} + +Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Ignores case. + +**Syntax** + +```sql +multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstIndexCaseInsensitive('hElLo WoRlD',['World','Hello']); +``` + +```response +1 +``` + +## multiSearchFirstIndexUTF8 {#multiSearchFirstIndexUTF8} + +Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. + +**Syntax** + +```sql +multiSearchFirstIndexUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Given `Hello World` as a UTF-8 string, find the first index of UTF-8 strings `Hello` and `World`. + +Query: + +```sql +SELECT multiSearchFirstIndexUTF8('\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64',['\x57\x6f\x72\x6c\x64','\x48\x65\x6c\x6c\x6f']); +``` + +```response +1 +``` + +## multiSearchFirstIndexCaseInsensitiveUTF8 {#multiSearchFirstIndexCaseInsensitiveUTF8} + +Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. Ignores case. + +**Syntax** + +```sql +multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- index (starting from 1) of the leftmost found needle. +- 0, if there was no match. + +**Example** + +Given `HELLO WORLD` as a UTF-8 string, find the first index of UTF-8 strings `hello` and `world`. + +Query: + +```sql +SELECT multiSearchFirstIndexCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['\x68\x65\x6c\x6c\x6f','\x77\x6f\x72\x6c\x64']); +``` + +```response +1 ``` ## multiSearchAny {#multisearchany} From a97a3196b45fa5cb86f7d95b4869318864a18562 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 20:52:55 +0100 Subject: [PATCH 057/470] Standardize formatting of multisearchFirstPositionXYZ functions --- .../functions/string-search-functions.md | 128 +++++++++++++++++- 1 file changed, 125 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index c067f9010fae..232397ce5924 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -278,16 +278,138 @@ SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\ ["1","6"] ``` -## multiSearchFirstPosition +## multiSearchFirstPosition {#multiSearchFirstPosition} Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. -Functions `multiSearchFirstPositionCaseInsensitive`, `multiSearchFirstPositionUTF8` and `multiSearchFirstPositionCaseInsensitiveUTF8` provide case-insensitive and/or UTF-8 variants of this function. +Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. **Syntax** ```sql -multiSearchFirstPosition(haystack, \[needle1, needle2, …, needlen\]) +multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstPosition('Hello World',['llo', 'Wor', 'ld']); +``` + +```response +3 +``` + +## multiSearchFirstPositionCaseInsensitive + +Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but ignores case. + +**Syntax** + +```sql +multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +- 0, if there was no match. + +**Example** + +Query: + +```sql +SELECT multiSearchFirstPositionCaseInsensitive('HELLO WORLD',['wor', 'ld', 'ello']); +``` + +```response +2 +``` + +## multiSearchFirstPositionUTF8 {#multiSearchFirstPositionUTF8} + +Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings. + +**Syntax** + +```sql +multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +- 0, if there was no match. + +**Example** + +Find the leftmost offset in UTF-8 string `hello world` which matches any of the given needles. + +Query: + +```sql +SELECT multiSearchFirstPositionUTF8('\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64',['wor', 'ld', 'ello']); +``` + +```response +2 +``` + +## multiSearchFirstPositionCaseInsensitiveUTF8 {#multiSearchFirstPositionCaseInsensitiveUTF8} + +Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case. + +**Syntax** + +```sql +multiSearchFirstPositionCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needleN]) +``` + +**Parameters** + +- `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) + +**Returned value** + +- Leftmost offset in a `haystack` string which matches any of multiple `needle` strings, ignoring case. +- 0, if there was no match. + +**Example** + +Find the leftmost offset in UTF-8 string `HELLO WORLD` which matches any of the given needles. + +Query: + +```sql +SELECT multiSearchFirstPositionCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['wor', 'ld', 'ello']); +``` + +```response +2 ``` ## multiSearchFirstIndex From 89afca881f81142481b31682882fdb2e1482f08c Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 21:06:17 +0100 Subject: [PATCH 058/470] Minor formatting changes to multipleSearchAllPositions --- docs/en/sql-reference/functions/string-search-functions.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 232397ce5924..1e564eb47412 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -159,11 +159,12 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) **Arguments** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. Array +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned values** -- Array of the starting position in bytes and counting from 1 (if the substring was found) or 0 (if the substring was not found) +- Array of the starting position in bytes and counting from 1 (if the substring was found). +- 0, if the substring was not found. **Example** From cc35db1cef95a57c458d2606f517b0f43b91c8e4 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 21:20:33 +0100 Subject: [PATCH 059/470] Finor fixes --- .../functions/string-search-functions.md | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 1e564eb47412..9430fb0f728e 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -192,7 +192,7 @@ multiSearchAllPositionsCaseInsensitive(haystack, [needle1, needle2, ..., needleN **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -213,7 +213,7 @@ SELECT multiSearchAllPositionsCaseInsensitive('ClickHouse',['c','h']); ## multiSearchAllPositionsUTF8 -Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings. +Like [multiSearchAllPositions](#multiSearchAllPositions) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings. **Syntax** @@ -224,7 +224,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -233,7 +233,7 @@ multiSearchAllPositionsUTF8(haystack, [needle1, needle2, ..., needleN]) **Example** -Given `ClickHouse` as a UTF-8 string, find the positions of `C` ('\x43') and `H` ('\x48'). +Given `ClickHouse` as a UTF-8 string, find the positions of `C` (`\x43`) and `H` (`\x48`). Query: @@ -258,7 +258,7 @@ multiSearchAllPositionsCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., nee **Parameters** - `haystack` — UTF-8 encoded string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — UTF-8 encoded substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -279,9 +279,9 @@ SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\ ["1","6"] ``` -## multiSearchFirstPosition {#multiSearchFirstPosition} +## multiSearchFirstPosition -Like `position` but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. +Like [`position`](#position) but returns the leftmost offset in a `haystack` string which matches any of multiple `needle` strings. Functions [`multiSearchFirstPositionCaseInsensitive`](#multiSearchFirstPositionCaseInsensitive), [`multiSearchFirstPositionUTF8`](#multiSearchFirstPositionUTF8) and [`multiSearchFirstPositionCaseInsensitiveUTF8`](#multiSearchFirstPositionCaseInsensitiveUTF8) provide case-insensitive and/or UTF-8 variants of this function. @@ -294,7 +294,7 @@ multiSearchFirstPosition(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -326,7 +326,7 @@ multiSearchFirstPositionCaseInsensitive(haystack, [needle1, needle2, ..., needle **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -345,7 +345,7 @@ SELECT multiSearchFirstPositionCaseInsensitive('HELLO WORLD',['wor', 'ld', 'ello 2 ``` -## multiSearchFirstPositionUTF8 {#multiSearchFirstPositionUTF8} +## multiSearchFirstPositionUTF8 Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings. @@ -358,7 +358,7 @@ multiSearchFirstPositionUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -379,7 +379,7 @@ SELECT multiSearchFirstPositionUTF8('\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x6 2 ``` -## multiSearchFirstPositionCaseInsensitiveUTF8 {#multiSearchFirstPositionCaseInsensitiveUTF8} +## multiSearchFirstPositionCaseInsensitiveUTF8 Like [`multiSearchFirstPosition`](#multiSearchFirstPosition) but assumes `haystack` and `needle` to be UTF-8 strings and ignores case. @@ -427,7 +427,7 @@ multiSearchFirstIndex(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -446,7 +446,7 @@ SELECT multiSearchFirstIndex('Hello World',['World','Hello']); 1 ``` -## multiSearchFirstIndexCaseInsensitive {#multiSearchFirstIndexCaseInsensitive} +## multiSearchFirstIndexCaseInsensitive Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Ignores case. @@ -459,7 +459,7 @@ multiSearchFirstIndexCaseInsensitive(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -478,7 +478,7 @@ SELECT multiSearchFirstIndexCaseInsensitive('hElLo WoRlD',['World','Hello']); 1 ``` -## multiSearchFirstIndexUTF8 {#multiSearchFirstIndexUTF8} +## multiSearchFirstIndexUTF8 Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. @@ -512,7 +512,7 @@ SELECT multiSearchFirstIndexUTF8('\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64', 1 ``` -## multiSearchFirstIndexCaseInsensitiveUTF8 {#multiSearchFirstIndexCaseInsensitiveUTF8} +## multiSearchFirstIndexCaseInsensitiveUTF8 Returns the index `i` (starting from 1) of the leftmost found needlei in the string `haystack` and 0 otherwise. Assumes `haystack` and `needle` are UTF-8 encoded strings. Ignores case. @@ -525,7 +525,7 @@ multiSearchFirstIndexCaseInsensitiveUTF8(haystack, [needle1, needle2, ..., needl **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Array of UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -546,7 +546,7 @@ SELECT multiSearchFirstIndexCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4 1 ``` -## multiSearchAny {#multisearchany} +## multiSearchAny Returns 1, if at least one string needlei matches the string `haystack` and 0 otherwise. @@ -561,7 +561,7 @@ multiSearchAny(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — Substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -580,7 +580,7 @@ SELECT multiSearchAny('ClickHouse',['C','H']); 1 ``` -## multiSearchAnyCaseInsensitive {#multiSearchAnyCaseInsensitive} +## multiSearchAnyCaseInsensitive Like [multiSearchAny](#multisearchany) but ignores case. @@ -612,9 +612,9 @@ SELECT multiSearchAnyCaseInsensitive('ClickHouse',['c','h']); 1 ``` -## multiSearchAnyUTF8 {#multiSearchAnyUTF8} +## multiSearchAnyUTF8 -Like [multiSearchAny](#multisearchany) but assumes `haystack` and the `needle`-s are UTF-8 encoded strings. +Like [multiSearchAny](#multisearchany) but assumes `haystack` and the `needle` substrings are UTF-8 encoded strings. *Syntax** @@ -625,7 +625,7 @@ multiSearchAnyUTF8(haystack, [needle1, needle2, ..., needleN]) **Parameters** - `haystack` — UTF-8 string in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). -- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md) +- `needle` — UTF-8 substrings to be searched. [Array](../../sql-reference/data-types/array.md). **Returned value** @@ -646,7 +646,7 @@ SELECT multiSearchAnyUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x 1 ``` -## multiSearchAnyCaseInsensitiveUTF8 {#multiSearchAnyCaseInsensitiveUTF8} +## multiSearchAnyCaseInsensitiveUTF8 Like [multiSearchAnyUTF8](#multiSearchAnyUTF8) but ignores case. From 7288aeb826ec4ea964bb6e408c7644a2f53cf7c1 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 21:45:36 +0100 Subject: [PATCH 060/470] More consistency edits --- .../functions/string-search-functions.md | 36 ++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 9430fb0f728e..7ffaee53f890 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -126,6 +126,8 @@ Like [position](#position) but assumes `haystack` and `needle` are UTF-8 encoded Function `positionUTF8` correctly counts character `ö` (represented by two points) as a single Unicode codepoint: +Query: + ``` sql SELECT positionUTF8('Motörhead', 'r'); ``` @@ -163,11 +165,13 @@ multiSearchAllPositions(haystack, [needle1, needle2, ..., needleN]) **Returned values** -- Array of the starting position in bytes and counting from 1 (if the substring was found). +- Array of the starting position in bytes and counting from 1, if the substring was found. - 0, if the substring was not found. **Example** +Query: + ``` sql SELECT multiSearchAllPositions('Hello, World!', ['hello', '!', 'world']); ``` @@ -207,6 +211,8 @@ Query: SELECT multiSearchAllPositionsCaseInsensitive('ClickHouse',['c','h']); ``` +Result: + ```response ["1","6"] ``` @@ -241,6 +247,8 @@ Query: SELECT multiSearchAllPositionsUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']); ``` +Result: + ```response ["1","6"] ``` @@ -275,6 +283,8 @@ Query: SELECT multiSearchAllPositionsCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x63','\x68']); ``` +Result: + ```response ["1","6"] ``` @@ -309,6 +319,8 @@ Query: SELECT multiSearchFirstPosition('Hello World',['llo', 'Wor', 'ld']); ``` +Result: + ```response 3 ``` @@ -341,6 +353,8 @@ Query: SELECT multiSearchFirstPositionCaseInsensitive('HELLO WORLD',['wor', 'ld', 'ello']); ``` +Result: + ```response 2 ``` @@ -375,6 +389,8 @@ Query: SELECT multiSearchFirstPositionUTF8('\x68\x65\x6c\x6c\x6f\x20\x77\x6f\x72\x6c\x64',['wor', 'ld', 'ello']); ``` +Result: + ```response 2 ``` @@ -409,6 +425,8 @@ Query: SELECT multiSearchFirstPositionCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['wor', 'ld', 'ello']); ``` +Result: + ```response 2 ``` @@ -442,6 +460,8 @@ Query: SELECT multiSearchFirstIndex('Hello World',['World','Hello']); ``` +Result: + ```response 1 ``` @@ -474,6 +494,8 @@ Query: SELECT multiSearchFirstIndexCaseInsensitive('hElLo WoRlD',['World','Hello']); ``` +Result: + ```response 1 ``` @@ -508,6 +530,8 @@ Query: SELECT multiSearchFirstIndexUTF8('\x48\x65\x6c\x6c\x6f\x20\x57\x6f\x72\x6c\x64',['\x57\x6f\x72\x6c\x64','\x48\x65\x6c\x6c\x6f']); ``` +Result: + ```response 1 ``` @@ -542,6 +566,8 @@ Query: SELECT multiSearchFirstIndexCaseInsensitiveUTF8('\x48\x45\x4c\x4c\x4f\x20\x57\x4f\x52\x4c\x44',['\x68\x65\x6c\x6c\x6f','\x77\x6f\x72\x6c\x64']); ``` +Result: + ```response 1 ``` @@ -576,6 +602,8 @@ Query: SELECT multiSearchAny('ClickHouse',['C','H']); ``` +Result: + ```response 1 ``` @@ -608,6 +636,8 @@ Query: SELECT multiSearchAnyCaseInsensitive('ClickHouse',['c','h']); ``` +Result: + ```response 1 ``` @@ -642,6 +672,8 @@ Query: SELECT multiSearchAnyUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x43','\x48']); ``` +Result: + ```response 1 ``` @@ -676,6 +708,8 @@ Query: SELECT multiSearchAnyCaseInsensitiveUTF8('\x43\x6c\x69\x63\x6b\x48\x6f\x75\x73\x65',['\x68']); ``` +Result: + ```response 1 ``` From 1dcba74f66d64bf7d77b7da585cabc16a5d4e29f Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 28 Mar 2024 21:54:26 +0100 Subject: [PATCH 061/470] Small grammar edits to description at top of the page --- .../functions/string-search-functions.md | 43 ++++++++++++++++--- 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 7ffaee53f890..8aff8b7e9303 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -6,14 +6,17 @@ sidebar_label: Searching in Strings # Functions for Searching in Strings -All functions in this section search by default case-sensitively. Case-insensitive search is usually provided by separate function variants. -Note that case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in English language is -`I` whereas in Turkish language it is `İ` - results for languages other than English may be unexpected. +All functions in this section search case-sensitively by default. Case-insensitive search is usually provided by separate function variants. -Functions in this section also assume that the searched string and the search string are single-byte encoded text. If this assumption is +:::note +Case-insensitive search follows the lowercase-uppercase rules of the English language. E.g. Uppercased `i` in the English language is +`I` whereas in the Turkish language it is `İ` - results for languages other than English may be unexpected. +::: + +Functions in this section also assume that the searched string (refered to in this section as `haystack`) and the search string (refered to in this section as `needle`) are single-byte encoded text. If this assumption is violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the -results are undefined. Note that no automatic Unicode normalization is performed, you can use the +results are undefined. Note that no automatic Unicode normalization is performed, however you can use the [normalizeUTF8*()](https://clickhouse.com/docs/en/sql-reference/functions/string-functions/) functions for that. [General strings functions](string-functions.md) and [functions for replacing in strings](string-replace-functions.md) are described separately. @@ -55,6 +58,8 @@ Type: `Integer`. **Examples** +Query: + ``` sql SELECT position('Hello, world!', '!'); ``` @@ -69,12 +74,16 @@ Result: Example with `start_pos` argument: +Query: + ``` sql SELECT position('Hello, world!', 'o', 1), position('Hello, world!', 'o', 7) ``` +Result: + ``` text ┌─position('Hello, world!', 'o', 1)─┬─position('Hello, world!', 'o', 7)─┐ │ 5 │ 9 │ @@ -83,6 +92,8 @@ SELECT Example for `needle IN haystack` syntax: +Query: + ```sql SELECT 6 = position('/' IN s) FROM (SELECT 'Hello/World' AS s); ``` @@ -97,6 +108,8 @@ Result: Examples with empty `needle` substring: +Query: + ``` sql SELECT position('abc', ''), @@ -108,6 +121,8 @@ SELECT position('abc', '', 5) ``` +Result: + ``` text ┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐ │ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │ @@ -116,7 +131,23 @@ SELECT ## positionCaseInsensitive -Like [position](#position) but searches case-insensitively. +A case insensitive invariant of [position](#position). + +**Example** + +Query: + +``` sql +SELECT position('Hello, world!', 'hello'); +``` + +Result: + +``` text +┌─position('Hello, world!', 'hello')─┐ +│ 0 │ +└────────────────────────────────────┘ +``` ## positionUTF8 From c5f88f6eddd7c0222dd3427ac75ebd1ca125718d Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Fri, 29 Mar 2024 15:40:14 +0800 Subject: [PATCH 062/470] Improve hive text read by remove setting --- src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp | 10 +--------- src/Processors/Formats/Impl/HiveTextRowInputFormat.h | 2 -- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index 9f2a795427c5..225ee8341f66 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -15,10 +15,10 @@ static FormatSettings updateFormatSettings(const FormatSettings & settings, cons { FormatSettings updated = settings; updated.skip_unknown_fields = true; - updated.with_names_use_header = true; updated.date_time_input_format = FormatSettings::DateTimeInputFormat::BestEffort; updated.defaults_for_omitted_fields = true; updated.csv.delimiter = updated.hive_text.fields_delimiter; + updated.csv.allow_variable_number_of_columns = true; if (settings.hive_text.input_field_names.empty()) updated.hive_text.input_field_names = header.getNames(); return updated; @@ -42,14 +42,6 @@ HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const Form { } -std::vector HiveTextFormatReader::readNames() -{ - PeekableReadBufferCheckpoint checkpoint{*buf, true}; - auto values = readHeaderRow(); - input_field_names.resize(values.size()); - return input_field_names; -} - std::vector HiveTextFormatReader::readTypes() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readTypes is not implemented"); diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h index 313aad0d40d2..71a8e607807d 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -27,8 +27,6 @@ class HiveTextFormatReader final : public CSVFormatReader { public: HiveTextFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_); - - std::vector readNames() override; std::vector readTypes() override; private: From 14882d9a7d7a7da867d1b8626f756e8e0d7e2a21 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Fri, 29 Mar 2024 16:06:24 +0800 Subject: [PATCH 063/470] remove readNames code --- src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp | 5 +++++ src/Processors/Formats/Impl/HiveTextRowInputFormat.h | 2 ++ 2 files changed, 7 insertions(+) diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index 225ee8341f66..f8652a430df4 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -42,6 +42,11 @@ HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const Form { } +std::vector HiveTextFormatReader::readNames() +{ + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readNames is not implemented"); +} + std::vector HiveTextFormatReader::readTypes() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readTypes is not implemented"); diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h index 71a8e607807d..313aad0d40d2 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -27,6 +27,8 @@ class HiveTextFormatReader final : public CSVFormatReader { public: HiveTextFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_); + + std::vector readNames() override; std::vector readTypes() override; private: From b0865cf2e37b3dd5cd636444523d037140e12887 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 30 Mar 2024 00:14:03 +0100 Subject: [PATCH 064/470] Fix restoring parts while a storage is readonly. --- .../MergeTree/ReplicatedMergeTreeSink.cpp | 15 ++++++++------- src/Storages/MergeTree/ReplicatedMergeTreeSink.h | 7 ++++--- src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++++-- 3 files changed, 18 insertions(+), 12 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index b43d47bf5f4f..fd5f3853a6b6 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -128,7 +128,8 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( bool deduplicate_, bool majority_quorum, ContextPtr context_, - bool is_attach_) + bool is_attach_, + bool allow_attach_while_readonly_) : SinkToStorage(metadata_snapshot_->getSampleBlock()) , storage(storage_) , metadata_snapshot(metadata_snapshot_) @@ -136,6 +137,7 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( , quorum_timeout_ms(quorum_timeout_ms_) , max_parts_per_block(max_parts_per_block_) , is_attach(is_attach_) + , allow_attach_while_readonly(allow_attach_while_readonly_) , quorum_parallel(quorum_parallel_) , deduplicate(deduplicate_) , log(getLogger(storage.getLogName() + " (Replicated OutputStream)")) @@ -440,7 +442,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF try { - bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num, false).second; + bool deduplicated = commitPart(zookeeper, part, partition.block_id, delayed_chunk->replicas_num).second; last_block_is_duplicate = last_block_is_duplicate || deduplicated; @@ -485,7 +487,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa while (true) { partition.temp_part.finalize(); - auto conflict_block_ids = commitPart(zookeeper, partition.temp_part.part, partition.block_id, delayed_chunk->replicas_num, false).first; + auto conflict_block_ids = commitPart(zookeeper, partition.temp_part.part, partition.block_id, delayed_chunk->replicas_num).first; if (conflict_block_ids.empty()) { auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); @@ -556,7 +558,7 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : ""; - bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second; + bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num).second; /// Set a special error code if the block is duplicate int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; @@ -647,8 +649,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: const ZooKeeperWithFaultInjectionPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const BlockIDsType & block_id, - size_t replicas_num, - bool writing_existing_part) + size_t replicas_num) { /// It is possible that we alter a part with different types of source columns. /// In this case, if column was not altered, the result type will be different with what we have in metadata. @@ -800,7 +801,7 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: /// When we attach existing parts it's okay to be in read-only mode /// For example during RESTORE REPLICA. - if (!writing_existing_part) + if (!allow_attach_while_readonly) { retries_ctl.setUserError( Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode: replica_path={}", storage.replica_path)); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 29f3183be646..39623c205840 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -45,7 +45,8 @@ class ReplicatedMergeTreeSinkImpl : public SinkToStorage ContextPtr context_, // special flag to determine the ALTER TABLE ATTACH PART without the query context, // needed to set the special LogEntryType::ATTACH_PART - bool is_attach_ = false); + bool is_attach_ = false, + bool allow_attach_while_readonly_ = false); ~ReplicatedMergeTreeSinkImpl() override; @@ -93,8 +94,7 @@ class ReplicatedMergeTreeSinkImpl : public SinkToStorage const ZooKeeperWithFaultInjectionPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const BlockIDsType & block_id, - size_t replicas_num, - bool writing_existing_part); + size_t replicas_num); /// Wait for quorum to be satisfied on path (quorum_path) form part (part_name) @@ -123,6 +123,7 @@ class ReplicatedMergeTreeSinkImpl : public SinkToStorage UInt64 cache_version = 0; bool is_attach = false; + bool allow_attach_while_readonly = false; bool quorum_parallel = false; const bool deduplicate = true; bool last_block_is_duplicate = false; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 951a0facc6b5..7ca508c362d1 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6512,7 +6512,7 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( /// TODO Allow to use quorum here. ReplicatedMergeTreeSink output(*this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, /* quorum_parallel */ false, query_context->getSettingsRef().insert_deduplicate, - /* majority_quorum */ false, query_context, /*is_attach*/true); + /* majority_quorum */ false, query_context, /* is_attach */ true, /* allow_attach_while_readonly */ true); for (size_t i = 0; i < loaded_parts.size(); ++i) { @@ -10500,7 +10500,11 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest void StorageReplicatedMergeTree::attachRestoredParts(MutableDataPartsVector && parts) { auto metadata_snapshot = getInMemoryMetadataPtr(); - auto sink = std::make_shared(*this, metadata_snapshot, 0, 0, 0, false, false, false, getContext(), /*is_attach*/true); + + auto sink = std::make_shared( + *this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, /* quorum_parallel */ false, + /* deduplicate */ false, /* majority_quorum */ false, getContext(), /* is_attach */ true, /* allow_attach_while_readonly */ false); + for (auto part : parts) sink->writeExistingPart(part); } From 89cee0a3d6a71e1fbba92ce4546cdb261b196802 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 31 Mar 2024 01:59:36 +0100 Subject: [PATCH 065/470] Move bridges to separate packages --- packages/clickhouse-common-static.yaml | 4 --- packages/clickhouse-library-bridge.yaml | 35 ++++++++++++++++++++++ packages/clickhouse-odbc-bridge.yaml | 35 ++++++++++++++++++++++ programs/library-bridge/CMakeLists.txt | 7 +---- programs/odbc-bridge/CMakeLists.txt | 7 +---- programs/odbc-bridge/ColumnInfoHandler.cpp | 1 + programs/odbc-bridge/ColumnInfoHandler.h | 1 - 7 files changed, 73 insertions(+), 17 deletions(-) create mode 100644 packages/clickhouse-library-bridge.yaml create mode 100644 packages/clickhouse-odbc-bridge.yaml diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml index 383ad39591cd..db330f808e15 100644 --- a/packages/clickhouse-common-static.yaml +++ b/packages/clickhouse-common-static.yaml @@ -36,10 +36,6 @@ contents: dst: /usr/bin/clickhouse - src: root/usr/bin/clickhouse-extract-from-config dst: /usr/bin/clickhouse-extract-from-config -- src: root/usr/bin/clickhouse-library-bridge - dst: /usr/bin/clickhouse-library-bridge -- src: root/usr/bin/clickhouse-odbc-bridge - dst: /usr/bin/clickhouse-odbc-bridge - src: root/usr/share/bash-completion/completions dst: /usr/share/bash-completion/completions - src: root/usr/share/clickhouse diff --git a/packages/clickhouse-library-bridge.yaml b/packages/clickhouse-library-bridge.yaml new file mode 100644 index 000000000000..95e7d4aaad00 --- /dev/null +++ b/packages/clickhouse-library-bridge.yaml @@ -0,0 +1,35 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-library-bridge" +description: | + ClickHouse Library Bridge - is a separate process for loading libraries for the 'library' dictionary sources and the CatBoost library. + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + +# Common packages config +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse + +# Package specific content +contents: +- src: root/usr/bin/clickhouse-library-bridge + dst: /usr/bin/clickhouse-library-bridge +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-common-static/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-common-static/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-common-static/README.md diff --git a/packages/clickhouse-odbc-bridge.yaml b/packages/clickhouse-odbc-bridge.yaml new file mode 100644 index 000000000000..2a7edf415499 --- /dev/null +++ b/packages/clickhouse-odbc-bridge.yaml @@ -0,0 +1,35 @@ +# package sources should be placed in ${PWD}/root +# nfpm should run from the same directory with a config +name: "clickhouse-odbc-bridge" +description: | + ClickHouse ODBC Bridge - is a separate process for loading ODBC drivers and interacting with external databases using the ODBC protocol. + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + +# Common packages config +arch: "${DEB_ARCH}" # amd64, arm64 +platform: "linux" +version: "${CLICKHOUSE_VERSION_STRING}" +vendor: "ClickHouse Inc." +homepage: "https://clickhouse.com" +license: "Apache" +section: "database" +priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse + +# Package specific content +contents: +- src: root/usr/bin/clickhouse-odbc-bridge + dst: /usr/bin/clickhouse-odbc-bridge +# docs +- src: ../AUTHORS + dst: /usr/share/doc/clickhouse-common-static/AUTHORS +- src: ../CHANGELOG.md + dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md +- src: ../LICENSE + dst: /usr/share/doc/clickhouse-common-static/LICENSE +- src: ../README.md + dst: /usr/share/doc/clickhouse-common-static/README.md diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 98d8848502d2..2fca10ce4d77 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -24,9 +24,4 @@ target_link_libraries(clickhouse-library-bridge PRIVATE set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) -if (SPLIT_DEBUG_SYMBOLS) - clickhouse_split_debug_symbols(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-library-bridge) -else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR}) - install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) -endif() +install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index d6cbe8f7215a..83839cc21acd 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -30,12 +30,7 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro) -if (SPLIT_DEBUG_SYMBOLS) - clickhouse_split_debug_symbols(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-odbc-bridge) -else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR}) - install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) -endif() +install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) if(ENABLE_TESTS) add_subdirectory(tests) diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index 4cb15de3b2cb..5ff985b3d121 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/programs/odbc-bridge/ColumnInfoHandler.h b/programs/odbc-bridge/ColumnInfoHandler.h index ca7044fdf328..610fb128c9de 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.h +++ b/programs/odbc-bridge/ColumnInfoHandler.h @@ -5,7 +5,6 @@ #if USE_ODBC #include -#include #include #include From 7fb2b7880e51a3c0a36ae3d6249bd7bc76202e45 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 31 Mar 2024 23:05:25 +0200 Subject: [PATCH 066/470] Remove useless files --- packages/clickhouse-common-static-dbg.yaml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index 96de4c17d88f..74b7fa8381bc 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -30,10 +30,6 @@ conflicts: contents: - src: root/usr/lib/debug/usr/bin/clickhouse.debug dst: /usr/lib/debug/usr/bin/clickhouse.debug -- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug - dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug -- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug - dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS From ff11d67f85cdf35e06e2fdd6d3a3409339f5e570 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 1 Apr 2024 10:50:02 +0800 Subject: [PATCH 067/470] add tests --- src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index f8652a430df4..117d98d23bb1 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -15,6 +15,7 @@ static FormatSettings updateFormatSettings(const FormatSettings & settings, cons { FormatSettings updated = settings; updated.skip_unknown_fields = true; + updated.with_names_use_header = false; updated.date_time_input_format = FormatSettings::DateTimeInputFormat::BestEffort; updated.defaults_for_omitted_fields = true; updated.csv.delimiter = updated.hive_text.fields_delimiter; @@ -33,7 +34,7 @@ HiveTextRowInputFormat::HiveTextRowInputFormat( HiveTextRowInputFormat::HiveTextRowInputFormat( const Block & header_, std::shared_ptr buf_, const Params & params_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, buf_, params_, true, false, format_settings_, std::make_unique(*buf_, format_settings_)) + header_, buf_, params_, false, false, format_settings_, std::make_unique(*buf_, format_settings_)) { } From e4473eb969df3296faf57efc9deafb02d19b479e Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 1 Apr 2024 11:51:14 +0800 Subject: [PATCH 068/470] add tests --- .../03033_hive_text_read_variable_fields.reference | 2 ++ .../03033_hive_text_read_variable_fields.sh | 13 +++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/03033_hive_text_read_variable_fields.reference create mode 100755 tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh diff --git a/tests/queries/0_stateless/03033_hive_text_read_variable_fields.reference b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.reference new file mode 100644 index 000000000000..2e7c474620b8 --- /dev/null +++ b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.reference @@ -0,0 +1,2 @@ +1 3 0 +3 5 9 diff --git a/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh new file mode 100755 index 000000000000..5af0e465cca6 --- /dev/null +++ b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +# NOTE: this sh wrapper is required because of shell_config + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "drop table if exists test_tbl" +$CLICKHOUSE_CLIENT -q "create table test_tbl (a UInt16, b UInt32, c UInt32) engine=MergeTree order by a" +$CLICKHOUSE_CLIENT -q "insert into test_tbl from infile '$CURDIR/data_hive/fields_number_variable.txt' SETTINGS input_format_hive_text_fields_delimiter=',' FORMAT HIVETEXT" +$CLICKHOUSE_CLIENT -q "select * from test_tbl" +$CLICKHOUSE_CLIENT -q "drop table test_tbl" \ No newline at end of file From 9b1b81d92120fa6251864f27762918a85c7c705b Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Mon, 1 Apr 2024 11:49:25 +0800 Subject: [PATCH 069/470] add tests --- src/Storages/StorageMemory.cpp | 34 +++++++++++ ...2_storage_memory_modify_settings.reference | 24 ++++---- .../03032_storage_memory_modify_settings.sql | 56 ++++++++++--------- 3 files changed, 78 insertions(+), 36 deletions(-) diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 055d9ee6fa1a..c5feb6dad21e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -307,6 +307,40 @@ void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr conte auto copy = memory_settings; copy.applyChanges(settings_changes.changes); copy.sanityCheck(); + + /// When modifying the values of max_bytes_to_keep and max_rows_to_keep to be smaller than the old values, + /// the old data needs to be removed. + if (!memory_settings.max_bytes_to_keep || memory_settings.max_bytes_to_keep > copy.max_bytes_to_keep + || !memory_settings.max_rows_to_keep || memory_settings.max_rows_to_keep > copy.max_rows_to_keep) + { + std::lock_guard lock(mutex); + + auto new_data = std::make_unique(*(data.get())); + UInt64 new_total_rows = total_size_rows.load(std::memory_order_relaxed); + UInt64 new_total_bytes = total_size_bytes.load(std::memory_order_relaxed); + while (!new_data->empty() + && ((copy.max_bytes_to_keep && new_total_bytes > copy.max_bytes_to_keep) + || (copy.max_rows_to_keep && new_total_rows > copy.max_rows_to_keep))) + { + Block oldest_block = new_data->front(); + UInt64 rows_to_remove = oldest_block.rows(); + UInt64 bytes_to_remove = oldest_block.allocatedBytes(); + if (new_total_bytes - bytes_to_remove < copy.min_bytes_to_keep + || new_total_rows - rows_to_remove < copy.min_rows_to_keep) + { + break; // stop - removing next block will put us under min_bytes / min_rows threshold + } + + // delete old block from current storage table + new_total_rows -= rows_to_remove; + new_total_bytes -= bytes_to_remove; + new_data->erase(new_data->begin()); + } + + data.set(std::move(new_data)); + total_size_rows.store(new_total_rows, std::memory_order_relaxed); + total_size_bytes.store(new_total_bytes, std::memory_order_relaxed); + } memory_settings = std::move(copy); } diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference index 20dda4fa15af..f7d25c40a430 100644 --- a/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference @@ -1,16 +1,20 @@ -TESTING BYTES -8192 -9216 -9216 +TESTING MODIFY SMALLER BYTES +17408 +16384 65536 -TESTING ROWS +TESTING MODIFY SMALLER ROWS +9216 +8192 +4096 +TESTING ADD SETTINGS +50 +1000 +1070 +1020 +1100 +TESTING ADD SETTINGS 50 1000 1020 1100 -TESTING NO CIRCULAR-BUFFER -8192 -9216 -17408 -82944 TESTING INVALID SETTINGS diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql index 34be327175ea..bfa13ee0ec8b 100644 --- a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql @@ -1,64 +1,68 @@ SET max_block_size = 65409; -- Default value +SELECT 'TESTING MODIFY SMALLER BYTES'; DROP TABLE IF EXISTS memory; -CREATE TABLE memory (i UInt32) ENGINE = Memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 8192, max_bytes_to_keep = 32768; + +INSERT INTO memory SELECT * FROM numbers(0, 100); +INSERT INTO memory SELECT * FROM numbers(0, 3000); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -SELECT 'TESTING BYTES'; -/* 1. testing oldest block doesn't get deleted because of min-threshold */ -INSERT INTO memory SELECT * FROM numbers(0, 1600); +INSERT INTO memory SELECT * FROM numbers(3000, 10000); SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -/* 2. adding block that doesn't get deleted */ -INSERT INTO memory SELECT * FROM numbers(1000, 100); +SELECT 'TESTING MODIFY SMALLER ROWS'; +DROP TABLE IF EXISTS memory; +CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 200, max_rows_to_keep = 2000; + +INSERT INTO memory SELECT * FROM numbers(0, 100); +INSERT INTO memory SELECT * FROM numbers(100, 1000); SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */ -INSERT INTO memory SELECT * FROM numbers(9000, 1000); +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -/* 4.check large block over-writes all bytes / rows */ -INSERT INTO memory SELECT * FROM numbers(9000, 10000); +INSERT INTO memory SELECT * FROM numbers(1000, 500); SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT 'TESTING ADD SETTINGS'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; -ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; - -SELECT 'TESTING ROWS'; -/* 1. add normal number of rows */ INSERT INTO memory SELECT * FROM numbers(0, 50); SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -/* 2. table should have 1000 */ INSERT INTO memory SELECT * FROM numbers(50, 950); SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -/* 3. table should have 1020 - removed first 50 */ INSERT INTO memory SELECT * FROM numbers(2000, 70); SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -/* 4. check large block over-writes all rows */ +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); + INSERT INTO memory SELECT * FROM numbers(3000, 1100); SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -SELECT 'TESTING NO CIRCULAR-BUFFER'; +SELECT 'TESTING ADD SETTINGS'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; -INSERT INTO memory SELECT * FROM numbers(0, 1600); -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +INSERT INTO memory SELECT * FROM numbers(0, 50); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(1000, 100); -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +INSERT INTO memory SELECT * FROM numbers(50, 950); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(9000, 1000); -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +INSERT INTO memory SELECT * FROM numbers(2000, 70); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(9000, 10000); -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +INSERT INTO memory SELECT * FROM numbers(3000, 1100); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); SELECT 'TESTING INVALID SETTINGS'; DROP TABLE IF EXISTS memory; From 9a37552af6b0c78c2fcaa9dd35f51e2ef8aeeb22 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 1 Apr 2024 12:05:41 +0800 Subject: [PATCH 070/470] add test file --- tests/queries/0_stateless/data_hive/fields_number_variable.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 tests/queries/0_stateless/data_hive/fields_number_variable.txt diff --git a/tests/queries/0_stateless/data_hive/fields_number_variable.txt b/tests/queries/0_stateless/data_hive/fields_number_variable.txt new file mode 100644 index 000000000000..b4e037978b90 --- /dev/null +++ b/tests/queries/0_stateless/data_hive/fields_number_variable.txt @@ -0,0 +1,2 @@ +1,3 +3,5,9 \ No newline at end of file From 724044c15ed7351fc5aaddbd4a96f53142f593dc Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 1 Apr 2024 18:22:17 +0800 Subject: [PATCH 071/470] ut fix --- src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index 117d98d23bb1..49495503455d 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -15,7 +15,7 @@ static FormatSettings updateFormatSettings(const FormatSettings & settings, cons { FormatSettings updated = settings; updated.skip_unknown_fields = true; - updated.with_names_use_header = false; + updated.with_names_use_header = true; updated.date_time_input_format = FormatSettings::DateTimeInputFormat::BestEffort; updated.defaults_for_omitted_fields = true; updated.csv.delimiter = updated.hive_text.fields_delimiter; @@ -34,7 +34,7 @@ HiveTextRowInputFormat::HiveTextRowInputFormat( HiveTextRowInputFormat::HiveTextRowInputFormat( const Block & header_, std::shared_ptr buf_, const Params & params_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, buf_, params_, false, false, format_settings_, std::make_unique(*buf_, format_settings_)) + header_, buf_, params_, true, false, format_settings_, std::make_unique(*buf_, header_, format_settings_)) { } @@ -45,7 +45,8 @@ HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const Form std::vector HiveTextFormatReader::readNames() { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readNames is not implemented"); + PeekableReadBufferCheckpoint checkpoint{*buf, true}; + return input_field_names; } std::vector HiveTextFormatReader::readTypes() From 2b4e6439ef6c5746085e39c9224024bf2f643a8d Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Mon, 1 Apr 2024 18:29:32 +0800 Subject: [PATCH 072/470] remove useless code --- src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index 49495503455d..6ddba781a54a 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -34,7 +34,7 @@ HiveTextRowInputFormat::HiveTextRowInputFormat( HiveTextRowInputFormat::HiveTextRowInputFormat( const Block & header_, std::shared_ptr buf_, const Params & params_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, buf_, params_, true, false, format_settings_, std::make_unique(*buf_, header_, format_settings_)) + header_, buf_, params_, true, false, format_settings_, std::make_unique(*buf_, format_settings_)) { } From 2a183fcbef32152534846bb569013e686284ef76 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 1 Apr 2024 16:29:39 +0000 Subject: [PATCH 073/470] fix build --- src/Databases/DatabasesCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 824e9fd7b4d2..c074bf201bda 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -348,7 +348,7 @@ StoragePtr DatabaseWithOwnTablesBase::getTableUnlocked(const String & table_name backQuote(database_name), backQuote(table_name)); } -std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool skip_not_loaded) const +std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const { std::vector> res; From aacb65299b21ab07cbf94a9d9b5d5a8c761b8935 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 1 Apr 2024 16:45:45 +0000 Subject: [PATCH 074/470] fix --- src/Databases/PostgreSQL/DatabasePostgreSQL.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp index b07b203f7862..3f62b9719d24 100644 --- a/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp +++ b/src/Databases/PostgreSQL/DatabasePostgreSQL.cpp @@ -97,7 +97,7 @@ bool DatabasePostgreSQL::empty() const } -DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & /* filter_by_table_name */) const +DatabaseTablesIteratorPtr DatabasePostgreSQL::getTablesIterator(ContextPtr local_context, const FilterByNameFunction & /* filter_by_table_name */, bool /* skip_not_loaded */) const { std::lock_guard lock(mutex); Tables tables; From 9c98f47f97c6a6c260a2969cd9ccfaafd22d0740 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 1 Apr 2024 16:53:49 +0000 Subject: [PATCH 075/470] more fixes --- src/Databases/DatabaseHDFS.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/DatabaseReplicated.cpp | 2 +- src/Databases/DatabasesCommon.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Databases/DatabaseHDFS.cpp b/src/Databases/DatabaseHDFS.cpp index 2688ff2443ce..1de7f80f512a 100644 --- a/src/Databases/DatabaseHDFS.cpp +++ b/src/Databases/DatabaseHDFS.cpp @@ -225,7 +225,7 @@ std::vector> DatabaseHDFS::getTablesForBackup(cons * Returns an empty iterator because the database does not have its own tables * But only caches them for quick access */ -DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const FilterByNameFunction &) const +DatabaseTablesIteratorPtr DatabaseHDFS::getTablesIterator(ContextPtr, const FilterByNameFunction &, bool) const { return std::make_unique(Tables{}, getDatabaseName()); } diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 3859c2fe0ceb..e9168d68ef7c 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -460,7 +460,7 @@ DatabaseTablesIteratorPtr DatabaseOrdinary::getTablesIterator(ContextPtr local_c return DatabaseWithOwnTablesBase::getTablesIterator(local_context, filter_by_table_name, skip_not_loaded); } -Strings DatabaseOrdinary::getAllTableNames(ContextPtr context) const +Strings DatabaseOrdinary::getAllTableNames(ContextPtr) const { std::set unique_names; { diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 3b1646f19034..7b8f7468e81b 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -873,7 +873,7 @@ void DatabaseReplicated::recoverLostReplica(const ZooKeeperPtr & current_zookeep std::vector replicated_tables_to_rename; size_t total_tables = 0; std::vector replicated_ids; - for (auto existing_tables_it = getTablesIterator(getContext(), {}); existing_tables_it->isValid(); + for (auto existing_tables_it = getTablesIterator(getContext(), {}, /*skip_not_loaded=*/false); existing_tables_it->isValid(); existing_tables_it->next(), ++total_tables) { String name = existing_tables_it->name(); diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index c074bf201bda..57385c98c15c 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -352,7 +352,7 @@ std::vector> DatabaseWithOwnTablesBase::getTablesF { std::vector> res; - for (auto it = getTablesIterator(local_context, filter); it->isValid(); it->next()) + for (auto it = getTablesIterator(local_context, filter, /*skip_not_loaded=*/false); it->isValid(); it->next()) { auto storage = it->table(); if (!storage) From 4415dd86dff1f52a5a8921f3033b444f48549260 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 2 Apr 2024 09:33:48 +0800 Subject: [PATCH 076/470] remove code of checkpoint --- src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index 6ddba781a54a..1399217d9770 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -45,7 +45,6 @@ HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const Form std::vector HiveTextFormatReader::readNames() { - PeekableReadBufferCheckpoint checkpoint{*buf, true}; return input_field_names; } From 2f5c52f7dff0e3a5c9bd82d565dfc368fa2ab4f0 Mon Sep 17 00:00:00 2001 From: Duc Canh Le Date: Tue, 2 Apr 2024 05:14:12 +0000 Subject: [PATCH 077/470] fix 0320_long_values_pretty_are_not_cut_if_single Signed-off-by: Duc Canh Le --- .../03020_long_values_pretty_are_not_cut_if_single.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh b/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh index fa9e9f6d3e15..b66951d93f61 100755 --- a/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh +++ b/tests/queries/0_stateless/03020_long_values_pretty_are_not_cut_if_single.sh @@ -9,6 +9,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # But cutting it in the result of SHOW CREATE TABLE will be bad for a user. # That's why we control it with the setting `output_format_pretty_max_value_width_apply_for_single_value`. +# Make sure that system.metric_log exists +${CLICKHOUSE_CLIENT} --query "SELECT 1 FORMAT Null" +${CLICKHOUSE_CLIENT} --query "SYSTEM FLUSH LOGS" + + ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE system.metric_log" --format Pretty | grep -P '^COMMENT' ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE system.metric_log" --format PrettyCompact | grep -P '^COMMENT' ${CLICKHOUSE_CLIENT} --query "SHOW CREATE TABLE system.metric_log" --format PrettySpace | grep -P '^COMMENT' From 70e8477a4815d8b96772a911e45488456659db3b Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 2 Apr 2024 17:53:23 +0800 Subject: [PATCH 078/470] add no-fasttest tag --- .../queries/0_stateless/03033_hive_text_read_variable_fields.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh index 5af0e465cca6..9dba99be7c8a 100755 --- a/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh +++ b/tests/queries/0_stateless/03033_hive_text_read_variable_fields.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash - +# Tags: no-fasttest # NOTE: this sh wrapper is required because of shell_config CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) From fb94a954258ebaeb6ca3fcecb6164243e4eb8d33 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Apr 2024 12:23:22 +0200 Subject: [PATCH 079/470] Add logging --- src/Interpreters/Cache/IFileCachePriority.h | 4 ++-- src/Interpreters/Cache/LRUFileCachePriority.cpp | 13 +++++++++++-- src/Interpreters/Cache/LRUFileCachePriority.h | 1 + 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index ff06f17ce36d..8dcc114d9cdf 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -192,8 +192,8 @@ class IFileCachePriority : private boost::noncopyable virtual void releaseImpl(size_t /* size */, size_t /* elements */) {} - size_t max_size = 0; - size_t max_elements = 0; + std::atomic max_size = 0; + std::atomic max_elements = 0; }; } diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index e65c102f1e38..1d9725352be6 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -439,12 +439,15 @@ void LRUFileCachePriority::LRUIterator::invalidate() assertValid(); const auto & entry = *iterator; - LOG_TEST(cache_priority->log, - "Invalidating entry in LRU queue entry {}", entry->toString()); chassert(entry->size != 0); cache_priority->updateSize(-entry->size); cache_priority->updateElementsCount(-1); + + LOG_TEST(cache_priority->log, + "Invalidated entry in LRU queue {}: {}", + entry->toString(), cache_priority->getApproxStateInfoForLog()); + entry->size = 0; } @@ -521,6 +524,12 @@ std::string LRUFileCachePriority::getStateInfoForLog(const CachePriorityGuard::L getSize(lock), max_size, getElementsCount(lock), max_elements, description); } +std::string LRUFileCachePriority::getApproxStateInfoForLog() const +{ + return fmt::format("size: {}/{}, elements: {}/{} (description: {})", + getSizeApprox(), max_size, getElementsCountApprox(), max_elements, description); +} + void LRUFileCachePriority::holdImpl( size_t size, size_t elements, diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 31968d611966..6627fcf1dee9 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -116,6 +116,7 @@ class LRUFileCachePriority final : public IFileCachePriority const CachePriorityGuard::Lock & lock) override; void releaseImpl(size_t size, size_t elements) override; + std::string getApproxStateInfoForLog() const; }; class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator From 0d6230717898adc62ef8872f39da080b4d231c8c Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 2 Apr 2024 10:34:52 +0000 Subject: [PATCH 080/470] Update tests --- tests/queries/0_stateless/01601_accurate_cast.reference | 1 - tests/queries/0_stateless/01601_accurate_cast.sql | 2 +- .../0_stateless/02303_cast_nullable_to_custom_types.reference | 4 ---- .../0_stateless/02303_cast_nullable_to_custom_types.sql | 4 ++-- 4 files changed, 3 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/01601_accurate_cast.reference b/tests/queries/0_stateless/01601_accurate_cast.reference index dbf9666f4cd8..82138e6354a5 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.reference +++ b/tests/queries/0_stateless/01601_accurate_cast.reference @@ -10,7 +10,6 @@ 1970-01-01 00:00:19 2023-05-30 1970-01-20 -\N true false true diff --git a/tests/queries/0_stateless/01601_accurate_cast.sql b/tests/queries/0_stateless/01601_accurate_cast.sql index d2ecede24023..471e4e34a4af 100644 --- a/tests/queries/0_stateless/01601_accurate_cast.sql +++ b/tests/queries/0_stateless/01601_accurate_cast.sql @@ -35,7 +35,7 @@ SELECT accurateCast('1xxx', 'Date'); -- { serverError CANNOT_PARSE_DATE } SELECT accurateCast('2023-05-30', 'Date'); SELECT accurateCast(19, 'Date'); -select accurateCast('test', 'Nullable(Bool)'); +select accurateCast('test', 'Nullable(Bool)'); -- { serverError CANNOT_PARSE_BOOL } select accurateCast('test', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } select accurateCast('truex', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } select accurateCast('xfalse', 'Bool'); -- { serverError CANNOT_PARSE_BOOL } diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference index 717484d46704..a257755481e7 100644 --- a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.reference @@ -39,7 +39,3 @@ fuzzer issue \N \N \N -\N -\N -\N -\N diff --git a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql index b56ebc2b09dc..570fbcde01fa 100644 --- a/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql +++ b/tests/queries/0_stateless/02303_cast_nullable_to_custom_types.sql @@ -24,7 +24,7 @@ select toIPv6OrNull(number % 2 ? '' : NULL) from numbers(2); select IPv6StringToNum(number % 2 ? '0000:0000:0000:0000:0000:0000:0000:0000' : NULL) from numbers(2); select 'fuzzer issue'; -SELECT CAST(if(number % 2, 'truetrue', NULL), 'Nullable(Bool)') FROM numbers(2); -SELECT CAST(if(number % 2, 'falsefalse', NULL), 'Nullable(Bool)') FROM numbers(2); +SELECT CAST(if(number % 2, 'truetrue', NULL), 'Nullable(Bool)') FROM numbers(2); -- {serverError CANNOT_PARSE_BOOL} +SELECT CAST(if(number % 2, 'falsefalse', NULL), 'Nullable(Bool)') FROM numbers(2); -- {serverError CANNOT_PARSE_BOOL} SELECT accurateCastOrNull(if(number % 2, NULL, 'truex'), 'Bool') FROM numbers(4); SELECT accurateCastOrNull(if(number % 2, 'truex', NULL), 'Bool') FROM numbers(4); From 5f676999ede965c82e3cb14c5e62fe30e370dec6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 2 Apr 2024 10:48:08 +0000 Subject: [PATCH 081/470] Address review comments --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 6dadada2e7fd..7d7fd380887c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -236,7 +236,6 @@ void ReplicatedMergeTreeQueue::removeDropReplaceIntent(const MergeTreePartInfo & bool ReplicatedMergeTreeQueue::isIntersectingWithDropReplaceIntent( const LogEntry & entry, const String & part_name, String & out_reason, std::unique_lock & /*state_mutex lock*/) const { - // TODO(antaljanosbenjamin): fill out out_reason const auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); for (const auto & intent : drop_replace_range_intents) { @@ -252,6 +251,7 @@ bool ReplicatedMergeTreeQueue::isIntersectingWithDropReplaceIntent( entry.new_part_name, part_name, intent.getPartNameForLogs()); + return true; } } return false; From 68320590edfa01664787b8aebf06e256af0ff88d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Tue, 2 Apr 2024 10:48:57 +0000 Subject: [PATCH 082/470] Wait for currently executing operations --- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 25 +++++++++++++++++++ .../MergeTree/ReplicatedMergeTreeQueue.h | 3 +++ src/Storages/StorageReplicatedMergeTree.cpp | 13 +++++++--- 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 7d7fd380887c..d2ec68186664 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1210,6 +1210,31 @@ void ReplicatedMergeTreeQueue::removePartProducingOpsInRange( entry->execution_complete.wait(lock, [&entry] { return !entry->currently_executing; }); } +void ReplicatedMergeTreeQueue::waitForCurrentlyExecutingOpsInRange(const MergeTreePartInfo & part_info) const +{ + Queue to_wait; + + std::unique_lock lock(state_mutex); + + for (const auto& entry : queue) + { + if (!entry->currently_executing) + continue; + + const auto virtual_part_names = entry->getVirtualPartNames(format_version); + for(const auto& virtual_part_name: virtual_part_names) { + if (!part_info.isDisjoint(MergeTreePartInfo::fromPartName(virtual_part_name, format_version))){ + to_wait.push_back(entry); + break; + } + } + } + + LOG_DEBUG(log, "Waiting for {} entries that are currently executing.", to_wait.size()); + + for (LogEntryPtr & entry : to_wait) + entry->execution_complete.wait(lock, [&entry] { return !entry->currently_executing; }); +} bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry, const String & new_part_name, String & out_reason, std::unique_lock & /* queue_lock */, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h index 95016d60ef10..60b1a08912bc 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.h @@ -355,6 +355,9 @@ class ReplicatedMergeTreeQueue const MergeTreePartInfo & part_info, const std::optional & covering_entry); + /// Wait for the execution of currently executing actions with virtual parts intersecting with part_info + void waitForCurrentlyExecutingOpsInRange(const MergeTreePartInfo & part_info) const; + /** In the case where there are not enough parts to perform the merge in part_name * - move actions with merged parts to the end of the queue * (in order to download a already merged part from another replica). diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 52847935a72d..1bcfd13e4917 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7998,23 +7998,28 @@ void StorageReplicatedMergeTree::replacePartitionFrom( replace = false; } - scope_guard intent_guard; if (!replace) { /// It's ATTACH PARTITION FROM, not REPLACE PARTITION. We have to reset drop range drop_range = makeDummyDropRangeForMovePartitionOrAttachPartitionFrom(partition_id); + } + + assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); + + scope_guard intent_guard; + if (replace) + { queue.addDropReplaceIntent(drop_range); intent_guard = scope_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + queue.waitForCurrentlyExecutingOpsInRange(drop_range); { auto pause_checking_parts = part_check_thread.pausePartsCheck(); part_check_thread.cancelRemovedPartsCheck(drop_range); } } - assert(replace == !LogEntry::ReplaceRangeEntry::isMovePartitionOrAttachFrom(drop_range)); - String drop_range_fake_part_name = getPartNamePossiblyFake(format_version, drop_range); std::set replaced_parts; @@ -8249,6 +8254,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta scope_guard intent_guard{[this, my_drop_range = drop_range]() { queue.removeDropReplaceIntent(my_drop_range); }}; getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + + queue.waitForCurrentlyExecutingOpsInRange(drop_range); { auto pause_checking_parts = part_check_thread.pausePartsCheck(); part_check_thread.cancelRemovedPartsCheck(drop_range); From 6018434f8246ea7598c6af4dc4d59b0fdf6bf630 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 2 Apr 2024 19:37:23 +0800 Subject: [PATCH 083/470] add config input_format_hive_text_allow_variable_number_of_columns --- src/Core/Settings.h | 1 + src/Formats/FormatFactory.cpp | 1 + src/Formats/FormatSettings.h | 1 + src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp | 2 +- 4 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 84e709294aaf..e66a56e6cea1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -1009,6 +1009,7 @@ class IColumn; M(Char, input_format_hive_text_fields_delimiter, '\x01', "Delimiter between fields in Hive Text File", 0) \ M(Char, input_format_hive_text_collection_items_delimiter, '\x02', "Delimiter between collection(array or map) items in Hive Text File", 0) \ M(Char, input_format_hive_text_map_keys_delimiter, '\x03', "Delimiter between a pair of map key/values in Hive Text File", 0) \ + M(Bool, input_format_hive_text_allow_variable_number_of_columns, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values", 0) \ M(UInt64, input_format_msgpack_number_of_columns, 0, "The number of columns in inserted MsgPack data. Used for automatic schema inference from data.", 0) \ M(MsgPackUUIDRepresentation, output_format_msgpack_uuid_representation, FormatSettings::MsgPackUUIDRepresentation::EXT, "The way how to output UUID in MsgPack format.", 0) \ M(UInt64, input_format_max_rows_to_read_for_schema_inference, 25000, "The maximum rows of data to read for automatic schema inference", 0) \ diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 8cbb1b9e5639..bd41dc12fa7b 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -96,6 +96,7 @@ FormatSettings getFormatSettings(const ContextPtr & context, const Settings & se format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter; format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter; format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter; + format_settings.hive_text.allow_variable_number_of_columns = settings.input_format_hive_text_allow_variable_number_of_columns; format_settings.custom.escaping_rule = settings.format_custom_escaping_rule; format_settings.custom.field_delimiter = settings.format_custom_field_delimiter; format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter; diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h index 5b7995e0da27..a239941469ff 100644 --- a/src/Formats/FormatSettings.h +++ b/src/Formats/FormatSettings.h @@ -176,6 +176,7 @@ struct FormatSettings char fields_delimiter = '\x01'; char collection_items_delimiter = '\x02'; char map_keys_delimiter = '\x03'; + bool allow_variable_number_of_columns = true; Names input_field_names; } hive_text{}; diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index 1399217d9770..b64318e40930 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -19,7 +19,7 @@ static FormatSettings updateFormatSettings(const FormatSettings & settings, cons updated.date_time_input_format = FormatSettings::DateTimeInputFormat::BestEffort; updated.defaults_for_omitted_fields = true; updated.csv.delimiter = updated.hive_text.fields_delimiter; - updated.csv.allow_variable_number_of_columns = true; + updated.csv.allow_variable_number_of_columns = settings.hive_text.allow_variable_number_of_columns; if (settings.hive_text.input_field_names.empty()) updated.hive_text.input_field_names = header.getNames(); return updated; From fd58e4d08bba0110335065a018a32d08654c151b Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:41:56 +0200 Subject: [PATCH 084/470] Better String to Variant(String) conversion --- src/Functions/FunctionsConversion.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 448a5fd8fc6e..60c069f632ce 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -4119,10 +4119,11 @@ class FunctionCast final : public IFunctionBase }; } - if (isStringOrFixedString(removeNullable(removeLowCardinality(from_type)))) + auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); + /// Cast String to Variant through parsing if it's not Variant(String). + if (isStringOrFixedString(removeNullable(removeLowCardinality(from_type))) && (!variant_discr_opt || to_variant.getVariants().size() > 1)) return createStringToVariantWrapper(); - auto variant_discr_opt = to_variant.tryGetVariantDiscriminator(*removeNullableOrLowCardinalityNullable(from_type)); if (!variant_discr_opt) throw Exception(ErrorCodes::CANNOT_CONVERT_TYPE, "Cannot convert type {} to {}. Conversion to Variant allowed only for types from this Variant", from_type->getName(), to_variant.getName()); From 27d13a400ffde0a4d983a6a4589b3f1fa7dd3649 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Tue, 2 Apr 2024 20:39:17 +0800 Subject: [PATCH 085/470] add settings to changes history --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 170836cb980d..f578e0c8d0af 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -125,6 +125,7 @@ static std::map sett {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, + {"input_format_hive_text_allow_variable_number_of_columns", true, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, From 7285a55f6983f7d6d89e5c0e95da19ccce78e4c8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Apr 2024 17:08:32 +0200 Subject: [PATCH 086/470] One more --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 1d9725352be6..4b65b1bd8adc 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -125,6 +125,9 @@ void LRUFileCachePriority::updateSize(int64_t size) chassert(size != 0); chassert(size > 0 || state->current_size >= size_t(-size)); + LOG_TEST(log, "Updating size with {}, current is {}", + size, state->current_size); + state->current_size += size; CurrentMetrics::add(CurrentMetrics::FilesystemCacheSize, size); } From 40b9f39c00b4c4fce757540e0ea0058bbe8f8360 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 2 Apr 2024 17:47:00 +0200 Subject: [PATCH 087/470] Analyzer: Fix query parameters --- src/Analyzer/QueryTreeBuilder.cpp | 16 +++--- src/Analyzer/SortNode.cpp | 17 ++++--- src/Interpreters/InterpreterSelectQuery.cpp | 16 +++--- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Parsers/ASTOrderByElement.cpp | 8 +-- src/Parsers/ASTOrderByElement.h | 54 +++++++++++++++++++-- src/Parsers/ExpressionElementParsers.cpp | 13 +++-- 7 files changed, 85 insertions(+), 41 deletions(-) diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index df80f46b3cd1..7f7d7a828854 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -444,8 +444,8 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express nulls_sort_direction = order_by_element.nulls_direction == 1 ? SortDirection::ASCENDING : SortDirection::DESCENDING; std::shared_ptr collator; - if (order_by_element.collation) - collator = std::make_shared(order_by_element.collation->as().value.get()); + if (order_by_element.getCollation()) + collator = std::make_shared(order_by_element.getCollation()->as().value.get()); const auto & sort_expression_ast = order_by_element.children.at(0); auto sort_expression = buildExpression(sort_expression_ast, context); @@ -455,12 +455,12 @@ QueryTreeNodePtr QueryTreeBuilder::buildSortList(const ASTPtr & order_by_express std::move(collator), order_by_element.with_fill); - if (order_by_element.fill_from) - sort_node->getFillFrom() = buildExpression(order_by_element.fill_from, context); - if (order_by_element.fill_to) - sort_node->getFillTo() = buildExpression(order_by_element.fill_to, context); - if (order_by_element.fill_step) - sort_node->getFillStep() = buildExpression(order_by_element.fill_step, context); + if (order_by_element.getFillFrom()) + sort_node->getFillFrom() = buildExpression(order_by_element.getFillFrom(), context); + if (order_by_element.getFillTo()) + sort_node->getFillTo() = buildExpression(order_by_element.getFillTo(), context); + if (order_by_element.getFillStep()) + sort_node->getFillStep() = buildExpression(order_by_element.getFillStep(), context); list_node->getNodes().push_back(std::move(sort_node)); } diff --git a/src/Analyzer/SortNode.cpp b/src/Analyzer/SortNode.cpp index 8e9913af442c..b9d93511b84b 100644 --- a/src/Analyzer/SortNode.cpp +++ b/src/Analyzer/SortNode.cpp @@ -120,17 +120,18 @@ ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value(); - result->with_fill = with_fill; - result->fill_from = hasFillFrom() ? getFillFrom()->toAST(options) : nullptr; - result->fill_to = hasFillTo() ? getFillTo()->toAST(options) : nullptr; - result->fill_step = hasFillStep() ? getFillStep()->toAST(options) : nullptr; result->children.push_back(getExpression()->toAST(options)); if (collator) - { - result->children.push_back(std::make_shared(Field(collator->getLocale()))); - result->collation = result->children.back(); - } + result->setCollation(std::make_shared(Field(collator->getLocale()))); + + result->with_fill = with_fill; + if (hasFillFrom()) + result->setFillFrom(getFillFrom()->toAST(options)); + if (hasFillTo()) + result->setFillTo(getFillTo()->toAST(options)); + if (hasFillStep()) + result->setFillStep(getFillStep()->toAST(options)); return result; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 6bbf03bb1e07..5864b35799e5 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1165,13 +1165,13 @@ static FillColumnDescription getWithFillDescription(const ASTOrderByElement & or { FillColumnDescription descr; - if (order_by_elem.fill_from) - std::tie(descr.fill_from, descr.fill_from_type) = getWithFillFieldValue(order_by_elem.fill_from, context); - if (order_by_elem.fill_to) - std::tie(descr.fill_to, descr.fill_to_type) = getWithFillFieldValue(order_by_elem.fill_to, context); + if (order_by_elem.getFillFrom()) + std::tie(descr.fill_from, descr.fill_from_type) = getWithFillFieldValue(order_by_elem.getFillFrom(), context); + if (order_by_elem.getFillTo()) + std::tie(descr.fill_to, descr.fill_to_type) = getWithFillFieldValue(order_by_elem.getFillTo(), context); - if (order_by_elem.fill_step) - std::tie(descr.fill_step, descr.step_kind) = getWithFillStep(order_by_elem.fill_step, context); + if (order_by_elem.getFillStep()) + std::tie(descr.fill_step, descr.step_kind) = getWithFillStep(order_by_elem.getFillStep(), context); else descr.fill_step = order_by_elem.direction; @@ -1217,8 +1217,8 @@ SortDescription InterpreterSelectQuery::getSortDescription(const ASTSelectQuery const auto & order_by_elem = elem->as(); std::shared_ptr collator; - if (order_by_elem.collation) - collator = std::make_shared(order_by_elem.collation->as().value.get()); + if (order_by_elem.getCollation()) + collator = std::make_shared(order_by_elem.getCollation()->as().value.get()); if (order_by_elem.with_fill) { diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index b71a8e3681d9..a341dae32fa1 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -277,7 +277,7 @@ void optimizeDuplicatesInOrderBy(const ASTSelectQuery * select_query) const auto & order_by_elem = elem->as(); if (order_by_elem.with_fill /// Always keep elements WITH FILL as they affects other. - || elems_set.emplace(name, order_by_elem.collation ? order_by_elem.collation->getColumnName() : "").second) + || elems_set.emplace(name, order_by_elem.getCollation() ? order_by_elem.getCollation()->getColumnName() : "").second) unique_elems.emplace_back(elem); } diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index 318849812aa5..be0416359a18 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -31,7 +31,7 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState << (settings.hilite ? hilite_none : ""); } - if (collation) + if (auto collation = getCollation()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " COLLATE " << (settings.hilite ? hilite_none : ""); collation->formatImpl(settings, state, frame); @@ -40,17 +40,17 @@ void ASTOrderByElement::formatImpl(const FormatSettings & settings, FormatState if (with_fill) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " WITH FILL" << (settings.hilite ? hilite_none : ""); - if (fill_from) + if (auto fill_from = getFillFrom()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " FROM " << (settings.hilite ? hilite_none : ""); fill_from->formatImpl(settings, state, frame); } - if (fill_to) + if (auto fill_to = getFillTo()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " TO " << (settings.hilite ? hilite_none : ""); fill_to->formatImpl(settings, state, frame); } - if (fill_step) + if (auto fill_step = getFillStep()) { settings.ostr << (settings.hilite ? hilite_keyword : "") << " STEP " << (settings.hilite ? hilite_none : ""); fill_step->formatImpl(settings, state, frame); diff --git a/src/Parsers/ASTOrderByElement.h b/src/Parsers/ASTOrderByElement.h index 4cebc30be31b..6edf84d7bde9 100644 --- a/src/Parsers/ASTOrderByElement.h +++ b/src/Parsers/ASTOrderByElement.h @@ -10,18 +10,34 @@ namespace DB */ class ASTOrderByElement : public IAST { +private: + enum class Child : uint8_t + { + EXPRESSION, + COLLATION, + FILL_FROM, + FILL_TO, + FILL_STEP, + }; + public: int direction = 0; /// 1 for ASC, -1 for DESC int nulls_direction = 0; /// Same as direction for NULLS LAST, opposite for NULLS FIRST. bool nulls_direction_was_explicitly_specified = false; + bool with_fill = false; + /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ - ASTPtr collation; + void setCollation(ASTPtr node) { setChild(Child::COLLATION, node); } + void setFillFrom(ASTPtr node) { setChild(Child::FILL_FROM, node); } + void setFillTo(ASTPtr node) { setChild(Child::FILL_TO, node); } + void setFillStep(ASTPtr node) { setChild(Child::FILL_STEP, node); } - bool with_fill = false; - ASTPtr fill_from; - ASTPtr fill_to; - ASTPtr fill_step; + /** Collation for locale-specific string comparison. If empty, then sorting done by bytes. */ + ASTPtr getCollation() const { return getChild(Child::COLLATION); } + ASTPtr getFillFrom() const { return getChild(Child::FILL_FROM); } + ASTPtr getFillTo() const { return getChild(Child::FILL_TO); } + ASTPtr getFillStep() const { return getChild(Child::FILL_STEP); } String getID(char) const override { return "OrderByElement"; } @@ -36,6 +52,34 @@ class ASTOrderByElement : public IAST protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; +private: + + ASTPtr getChild(Child child) const + { + auto it = positions.find(child); + if (it != positions.end()) + return children[it->second]; + return {}; + } + + void setChild(Child child, ASTPtr node) + { + if (node == nullptr) + return; + + auto it = positions.find(child); + if (it != positions.end()) + { + children[it->second] = node; + } + else + { + positions[child] = children.size(); + children.push_back(node); + } + } + + std::unordered_map positions; }; } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 67f4a306292d..d4ad210b3151 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -2120,17 +2120,16 @@ bool ParserOrderByElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expect auto elem = std::make_shared(); + elem->children.push_back(expr_elem); + elem->direction = direction; elem->nulls_direction = nulls_direction; elem->nulls_direction_was_explicitly_specified = nulls_direction_was_explicitly_specified; - elem->collation = locale_node; + elem->setCollation(locale_node); elem->with_fill = has_with_fill; - elem->fill_from = fill_from; - elem->fill_to = fill_to; - elem->fill_step = fill_step; - elem->children.push_back(expr_elem); - if (locale_node) - elem->children.push_back(locale_node); + elem->setFillFrom(fill_from); + elem->setFillTo(fill_to); + elem->setFillStep(fill_step); node = elem; From ed9ee5ab4cfa56cd615024d20f1d7d1c31b88be3 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 15:47:48 +0000 Subject: [PATCH 088/470] First portion --- .../03033_with_fill_interpolate.reference | 0 .../03033_with_fill_interpolate.sql | 27 ++++++++++++ .../03034_normalized_ast.reference | 0 .../0_stateless/03034_normalized_ast.sql | 7 +++ ...035_alias_column_bug_distributed.reference | 0 .../03035_alias_column_bug_distributed.sql | 43 +++++++++++++++++++ .../0_stateless/03036_with_numbers.reference | 20 +++++++++ .../0_stateless/03036_with_numbers.sql | 8 ++++ 8 files changed, 105 insertions(+) create mode 100644 tests/queries/0_stateless/03033_with_fill_interpolate.reference create mode 100644 tests/queries/0_stateless/03033_with_fill_interpolate.sql create mode 100644 tests/queries/0_stateless/03034_normalized_ast.reference create mode 100644 tests/queries/0_stateless/03034_normalized_ast.sql create mode 100644 tests/queries/0_stateless/03035_alias_column_bug_distributed.reference create mode 100644 tests/queries/0_stateless/03035_alias_column_bug_distributed.sql create mode 100644 tests/queries/0_stateless/03036_with_numbers.reference create mode 100644 tests/queries/0_stateless/03036_with_numbers.sql diff --git a/tests/queries/0_stateless/03033_with_fill_interpolate.reference b/tests/queries/0_stateless/03033_with_fill_interpolate.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03033_with_fill_interpolate.sql b/tests/queries/0_stateless/03033_with_fill_interpolate.sql new file mode 100644 index 000000000000..816633af757b --- /dev/null +++ b/tests/queries/0_stateless/03033_with_fill_interpolate.sql @@ -0,0 +1,27 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/55794 +DROP TABLE IF EXISTS 03033_example_table; + +CREATE TABLE 03033_example_table +( + ColumnA Int64, + ColumnB Int64, + ColumnC Int64 +) +ENGINE = MergeTree() +ORDER BY ColumnA; + +WITH +helper AS ( + SELECT + * + FROM + 03033_example_table + ORDER BY + ColumnA WITH FILL INTERPOLATE ( + ColumnB AS ColumnC, + ColumnC AS ColumnA + ) +) +SELECT ColumnB FROM helper; + +DROP TABLE IF EXISTS 03033_example_table; diff --git a/tests/queries/0_stateless/03034_normalized_ast.reference b/tests/queries/0_stateless/03034_normalized_ast.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03034_normalized_ast.sql b/tests/queries/0_stateless/03034_normalized_ast.sql new file mode 100644 index 000000000000..ff6f8da118cd --- /dev/null +++ b/tests/queries/0_stateless/03034_normalized_ast.sql @@ -0,0 +1,7 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/49472 +SELECT + concat(database, table) AS name, + count() +FROM clusterAllReplicas(default, system.tables) +GROUP BY name +FORMAT Null; diff --git a/tests/queries/0_stateless/03035_alias_column_bug_distributed.reference b/tests/queries/0_stateless/03035_alias_column_bug_distributed.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql new file mode 100644 index 000000000000..fb459b3289b0 --- /dev/null +++ b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql @@ -0,0 +1,43 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/44414 +DROP TABLE IF EXISTS alias_bug; +DROP TABLE IF EXISTS alias_bug_dist; +CREATE TABLE alias_bug +( + `src` String, + `theAlias` String ALIAS trimBoth(src) +) +ENGINE = MergeTree() +ORDER BY src; + +CREATE TABLE alias_bug_dist +AS alias_bug +ENGINE = Distributed('default', currentDatabase(), 'alias_bug', rand()); + +INSERT INTO alias_bug VALUES ('SOURCE1'); + +-- OK +SELECT theAlias,CAST(NULL, 'Nullable(String)') AS src FROM alias_bug LIMIT 1 FORMAT Null; + +-- Not OK +SELECT theAlias,CAST(NULL, 'Nullable(String)') AS src FROM alias_bug_dist LIMIT 1 FORMAT Null; + +DROP TABLE IF EXISTS alias_bug; +DROP TABLE IF EXISTS alias_bug_dist; +CREATE TABLE alias_bug +( + `s` String, + `src` String, + `theAlias` String ALIAS trimBoth(src) +) +ENGINE = MergeTree() +ORDER BY src; + +CREATE TABLE alias_bug_dist +AS alias_bug +ENGINE = Distributed('default', currentDatabase(), 'alias_bug', rand()); + +-- Unknown identifier +SELECT CAST(123, 'String') AS src,theAlias FROM alias_bug_dist LIMIT 1 FORMAT Null; + +DROP TABLE IF EXISTS alias_bug; +DROP TABLE IF EXISTS alias_bug_dist; diff --git a/tests/queries/0_stateless/03036_with_numbers.reference b/tests/queries/0_stateless/03036_with_numbers.reference new file mode 100644 index 000000000000..7b36cc96f5ec --- /dev/null +++ b/tests/queries/0_stateless/03036_with_numbers.reference @@ -0,0 +1,20 @@ +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git a/tests/queries/0_stateless/03036_with_numbers.sql b/tests/queries/0_stateless/03036_with_numbers.sql new file mode 100644 index 000000000000..5e08bb6e0652 --- /dev/null +++ b/tests/queries/0_stateless/03036_with_numbers.sql @@ -0,0 +1,8 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/13843 +WITH 10 AS n +SELECT * +FROM numbers(n); + +WITH cast(10, 'UInt64') AS n +SELECT * +FROM numbers(n); From 259da73b17189c25f70e0e15bd4bc47f1362166a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 2 Apr 2024 17:54:49 +0200 Subject: [PATCH 089/470] Add a test --- .../0_stateless/03033_analyzer_query_parameters.reference | 2 ++ .../0_stateless/03033_analyzer_query_parameters.sh | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03033_analyzer_query_parameters.reference create mode 100755 tests/queries/0_stateless/03033_analyzer_query_parameters.sh diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.reference b/tests/queries/0_stateless/03033_analyzer_query_parameters.reference new file mode 100644 index 000000000000..6ed281c757a9 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.reference @@ -0,0 +1,2 @@ +1 +1 diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.sh b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh new file mode 100755 index 000000000000..c821791e4378 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 1" +clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 0" From c11aa0122647b39177d381499d08f6102a5e5160 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Apr 2024 17:48:48 +0200 Subject: [PATCH 090/470] Fix race --- src/Interpreters/Cache/EvictionCandidates.cpp | 16 +++++++++++++--- src/Interpreters/Cache/EvictionCandidates.h | 4 ++-- src/Interpreters/Cache/FileSegment.cpp | 5 +++++ src/Interpreters/Cache/FileSegment.h | 1 + src/Interpreters/Cache/LRUFileCachePriority.cpp | 6 ++++++ src/Interpreters/Cache/LRUFileCachePriority.h | 2 +- 6 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index 080ef6759181..4ca6aeea22ea 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -30,6 +30,10 @@ EvictionCandidates::~EvictionCandidates() iterator->invalidate(); } + /// We cannot reset evicting flag if we already removed queue entries. + if (removed_queue_entries) + return; + /// Here `candidates` contain only those file segments /// which failed to be removed during evict() /// because there was some exception before evict() @@ -62,9 +66,15 @@ void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & loc for (const auto & [key, key_candidates] : candidates) { for (const auto & candidate : key_candidates.candidates) + { + const auto & file_segment = candidate->file_segment; + auto file_segment_lock = file_segment->lock(); + candidate->getQueueIterator()->remove(lock); + file_segment->setQueueIteratorUnlocked(nullptr, file_segment_lock); + } } - invalidated_queue_entries = true; + removed_queue_entries = true; } void EvictionCandidates::evict() @@ -74,7 +84,7 @@ void EvictionCandidates::evict() auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); - if (!invalidated_queue_entries) + if (!removed_queue_entries) queue_entries_to_invalidate.reserve(candidates_size); for (auto & [key, key_candidates] : candidates) @@ -123,7 +133,7 @@ void EvictionCandidates::evict() /// it was freed in favour of some reserver, so we can make it visibly /// free only for that particular reserver. - if (!invalidated_queue_entries) + if (!removed_queue_entries) queue_entries_to_invalidate.push_back(iterator); key_candidates.candidates.pop_back(); diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index 571010a14bce..140728ae7044 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -4,7 +4,7 @@ namespace DB { -class EvictionCandidates +class EvictionCandidates : private boost::noncopyable { public: using FinalizeEvictionFunc = std::function; @@ -60,7 +60,7 @@ class EvictionCandidates std::vector on_finalize; std::vector queue_entries_to_invalidate; - bool invalidated_queue_entries = false; + bool removed_queue_entries = false; IFileCachePriority::HoldSpacePtr hold_space; }; diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 9ec2b090dc77..0d64b602928d 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -165,6 +165,11 @@ FileSegment::Priority::IteratorPtr FileSegment::getQueueIterator() const void FileSegment::setQueueIterator(Priority::IteratorPtr iterator) { auto lock = lockFileSegment(); + setQueueIteratorUnlocked(iterator, lock); +} + +void FileSegment::setQueueIteratorUnlocked(Priority::IteratorPtr iterator, const FileSegmentGuard::Lock &) +{ if (queue_iterator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Queue iterator cannot be set twice"); queue_iterator = iterator; diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index c34ee064345a..12046e59bd61 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -176,6 +176,7 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). Priority::IteratorPtr getQueueIterator() const; void setQueueIterator(Priority::IteratorPtr iterator); + void setQueueIteratorUnlocked(Priority::IteratorPtr iterator, const FileSegmentGuard::Lock &); KeyMetadataPtr tryGetKeyMetadata() const; diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 78ece5a31245..4f2b17ea1049 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -431,6 +431,12 @@ bool LRUFileCachePriority::modifySizeLimits( return true; } +IFileCachePriority::EntryPtr LRUFileCachePriority::LRUIterator::getEntry() const +{ + assertValid(); + return *iterator; +} + void LRUFileCachePriority::LRUIterator::remove(const CachePriorityGuard::Lock & lock) { assertValid(); diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index ed3a455126a5..0d10f22701ee 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -146,7 +146,7 @@ class LRUFileCachePriority::LRUIterator : public IFileCachePriority::Iterator LRUIterator & operator =(const LRUIterator & other); bool operator ==(const LRUIterator & other) const; - EntryPtr getEntry() const override { return *iterator; } + EntryPtr getEntry() const override; size_t increasePriority(const CachePriorityGuard::Lock &) override; From 4441a1b3f3c2f6844e233e379335d4bdf4922ab5 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 15:59:17 +0000 Subject: [PATCH 091/470] Close https://github.com/ClickHouse/ClickHouse/issues/55803 --- .../0_stateless/03037_union_view.reference | 0 .../queries/0_stateless/03037_union_view.sql | 24 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tests/queries/0_stateless/03037_union_view.reference create mode 100644 tests/queries/0_stateless/03037_union_view.sql diff --git a/tests/queries/0_stateless/03037_union_view.reference b/tests/queries/0_stateless/03037_union_view.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03037_union_view.sql b/tests/queries/0_stateless/03037_union_view.sql new file mode 100644 index 000000000000..04f4afab4c48 --- /dev/null +++ b/tests/queries/0_stateless/03037_union_view.sql @@ -0,0 +1,24 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/55803 +CREATE TABLE broken_table +( + start DateTime64(6), + end DateTime64(6), +) +ENGINE = ReplacingMergeTree(start) +ORDER BY (start); + +CREATE VIEW broken_view as +SELECT + t.start as start, + t.end as end, + cast(datediff('second', t.start, t.end) as float) as total_sec +FROM broken_table t FINAL +UNION ALL +SELECT + null as start, + null as end, + null as total_sec; + +SELECT v.start, v.total_sec +FROM broken_view v FINAL +WHERE v.start IS NOT NULL; From d6504764000b762cea48a2c633286ea77cb388c7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:01:55 +0000 Subject: [PATCH 092/470] Better --- tests/queries/0_stateless/03037_union_view.sql | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/queries/0_stateless/03037_union_view.sql b/tests/queries/0_stateless/03037_union_view.sql index 04f4afab4c48..fb8aa7df9542 100644 --- a/tests/queries/0_stateless/03037_union_view.sql +++ b/tests/queries/0_stateless/03037_union_view.sql @@ -1,4 +1,7 @@ -- https://github.com/ClickHouse/ClickHouse/issues/55803 +DROP TABLE IF EXISTS broken_table; +DROP TABLE IF EXISTS broken_view; + CREATE TABLE broken_table ( start DateTime64(6), @@ -22,3 +25,6 @@ SELECT SELECT v.start, v.total_sec FROM broken_view v FINAL WHERE v.start IS NOT NULL; + +DROP TABLE IF EXISTS broken_table; +DROP TABLE IF EXISTS broken_view; \ No newline at end of file From f5c514615301659bd9fad8b6dcc13623a034a620 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Apr 2024 18:04:53 +0200 Subject: [PATCH 093/470] [RFC] Try to add global Real and CPU trace collector --- programs/server/Server.cpp | 6 ++- src/Common/QueryProfiler.cpp | 8 ++++ src/Common/QueryProfiler.h | 2 + src/Common/ThreadPool.cpp | 15 ++++--- src/Common/ThreadPool.h | 32 ++++++++++++--- src/Common/ThreadPool_fwd.h | 7 ++-- src/Common/ThreadStatus.h | 1 + src/Core/ServerSettings.h | 2 + src/Interpreters/ThreadStatusExt.cpp | 40 +++++++++++++++++-- .../config.d/serverwide_trace_collector.xml | 4 ++ .../__init__.py | 1 + .../configs/global_profiler.xml | 4 ++ .../test_trace_collector_serverwide/test.py | 38 ++++++++++++++++++ 13 files changed, 142 insertions(+), 18 deletions(-) create mode 100644 tests/config/config.d/serverwide_trace_collector.xml create mode 100644 tests/integration/test_trace_collector_serverwide/__init__.py create mode 100644 tests/integration/test_trace_collector_serverwide/configs/global_profiler.xml create mode 100644 tests/integration/test_trace_collector_serverwide/test.py diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 450e1696c115..a048bebc45b9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -734,13 +734,17 @@ try LOG_INFO(log, "Available CPU instruction sets: {}", cpu_info); #endif + bool will_have_trace_collector = hasPHDRCache() && config().has("trace_log"); + // Initialize global thread pool. Do it before we fetch configs from zookeeper // nodes (`from_zk`), because ZooKeeper interface uses the pool. We will // ignore `max_thread_pool_size` in configs we fetch from ZK, but oh well. GlobalThreadPool::initialize( server_settings.max_thread_pool_size, server_settings.max_thread_pool_free_size, - server_settings.thread_pool_queue_size); + server_settings.thread_pool_queue_size, + will_have_trace_collector ? server_settings.global_profiler_real_time_period_ns : 0, + will_have_trace_collector ? server_settings.global_profiler_cpu_time_period_ns : 0); /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). SCOPE_EXIT({ Stopwatch watch; diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 34ffbf6c498f..3b7289167e34 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -198,6 +198,7 @@ void Timer::cleanup() { if (timer_id) { + LOG_INFO(log, "CLEANUP TIMER"); int err = timer_delete(*timer_id); if (err) LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString()); @@ -260,6 +261,13 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t #endif } + +template +void QueryProfilerBase::setPeriod(UInt32 period_) +{ + timer.set(period_); +} + template QueryProfilerBase::~QueryProfilerBase() { diff --git a/src/Common/QueryProfiler.h b/src/Common/QueryProfiler.h index 254b11137ccb..ea4cc73bca63 100644 --- a/src/Common/QueryProfiler.h +++ b/src/Common/QueryProfiler.h @@ -57,6 +57,8 @@ class QueryProfilerBase QueryProfilerBase(UInt64 thread_id, int clock_type, UInt32 period, int pause_signal_); ~QueryProfilerBase(); + void setPeriod(UInt32 period_); + private: void cleanup(); diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 3c2e6228421e..eaee070c44f5 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -490,8 +490,9 @@ void ThreadPoolImpl::worker(typename std::list::iterator thread_ template class ThreadPoolImpl; -template class ThreadPoolImpl>; -template class ThreadFromGlobalPoolImpl; +template class ThreadPoolImpl>; +template class ThreadFromGlobalPoolImpl; +template class ThreadFromGlobalPoolImpl; std::unique_ptr GlobalThreadPool::the_instance; @@ -500,7 +501,9 @@ GlobalThreadPool::GlobalThreadPool( size_t max_threads_, size_t max_free_threads_, size_t queue_size_, - const bool shutdown_on_exception_) + const bool shutdown_on_exception_, + UInt64 global_profiler_real_time_period_ns_, + UInt64 global_profiler_cpu_time_period_ns_) : FreeThreadPool( CurrentMetrics::GlobalThread, CurrentMetrics::GlobalThreadActive, @@ -509,10 +512,12 @@ GlobalThreadPool::GlobalThreadPool( max_free_threads_, queue_size_, shutdown_on_exception_) + , global_profiler_real_time_period_ns(global_profiler_real_time_period_ns_) + , global_profiler_cpu_time_period_ns(global_profiler_cpu_time_period_ns_) { } -void GlobalThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size) +void GlobalThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size, UInt64 global_profiler_real_time_period_ns, UInt64 global_profiler_cpu_time_period_ns) { if (the_instance) { @@ -520,7 +525,7 @@ void GlobalThreadPool::initialize(size_t max_threads, size_t max_free_threads, s "The global thread pool is initialized twice"); } - the_instance.reset(new GlobalThreadPool(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/)); + the_instance.reset(new GlobalThreadPool(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/, global_profiler_real_time_period_ns, global_profiler_cpu_time_period_ns)); } GlobalThreadPool & GlobalThreadPool::instance() diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 31e4eabf63b8..528f782caf25 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -172,10 +172,21 @@ class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable size_t max_threads_, size_t max_free_threads_, size_t queue_size_, - bool shutdown_on_exception_); + bool shutdown_on_exception_, + UInt64 global_profiler_real_time_period_ns_, + UInt64 global_profiler_cpu_time_period_ns_); public: - static void initialize(size_t max_threads = 10000, size_t max_free_threads = 1000, size_t queue_size = 10000); + UInt64 global_profiler_real_time_period_ns; + UInt64 global_profiler_cpu_time_period_ns; + + static void initialize( + size_t max_threads = 10000, + size_t max_free_threads = 1000, + size_t queue_size = 10000, + UInt64 global_profiler_real_time_period_ns_ = 0, + UInt64 global_profiler_cpu_time_period_ns_ = 0); + static GlobalThreadPool & instance(); static void shutdown(); }; @@ -187,7 +198,7 @@ class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable * NOTE: User code should use 'ThreadFromGlobalPool' declared below instead of directly using this class. * */ -template +template class ThreadFromGlobalPoolImpl : boost::noncopyable { public: @@ -197,11 +208,15 @@ class ThreadFromGlobalPoolImpl : boost::noncopyable explicit ThreadFromGlobalPoolImpl(Function && func, Args &&... args) : state(std::make_shared()) { + UInt64 global_profiler_real_time_period = GlobalThreadPool::instance().global_profiler_real_time_period_ns; + UInt64 global_profiler_cpu_time_period = GlobalThreadPool::instance().global_profiler_cpu_time_period_ns; /// NOTE: /// - If this will throw an exception, the destructor won't be called /// - this pointer cannot be passed in the lambda, since after detach() it will not be valid GlobalThreadPool::instance().scheduleOrThrow([ my_state = state, + global_profiler_real_time_period, + global_profiler_cpu_time_period, my_func = std::forward(func), my_args = std::make_tuple(std::forward(args)...)]() mutable /// mutable is needed to destroy capture { @@ -220,6 +235,12 @@ class ThreadFromGlobalPoolImpl : boost::noncopyable /// Thread status holds raw pointer on query context, thus it always must be destroyed /// before sending signal that permits to join this thread. DB::ThreadStatus thread_status; + if constexpr (global_trace_collector_allowed) + { + if (unlikely(global_profiler_real_time_period != 0 || global_profiler_cpu_time_period != 0)) + thread_status.initGlobalProfiler(global_profiler_real_time_period, global_profiler_cpu_time_period); + } + std::apply(function, arguments); }, {}, // default priority @@ -305,11 +326,12 @@ class ThreadFromGlobalPoolImpl : boost::noncopyable /// you need to use class, or you need to use ThreadFromGlobalPool below. /// /// See the comments of ThreadPool below to know how it works. -using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; /// An alias of thread that execute jobs/tasks on global thread pool by implicit passing tracing context on current thread to underlying worker as parent tracing context. /// If jobs/tasks are directly scheduled by using APIs of this class, you need to use this class or you need to use class above. -using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolWithoutTraceCollector = ThreadFromGlobalPoolImpl; /// Recommended thread pool for the case when multiple thread pools are created and destroyed. /// diff --git a/src/Common/ThreadPool_fwd.h b/src/Common/ThreadPool_fwd.h index 2782acc9c516..fea4e59f0879 100644 --- a/src/Common/ThreadPool_fwd.h +++ b/src/Common/ThreadPool_fwd.h @@ -3,11 +3,12 @@ template class ThreadPoolImpl; -template +template class ThreadFromGlobalPoolImpl; -using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolNoTracingContextPropagation = ThreadFromGlobalPoolImpl; -using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPool = ThreadFromGlobalPoolImpl; +using ThreadFromGlobalPoolWithoutTraceCollector = ThreadFromGlobalPoolImpl; using ThreadPool = ThreadPoolImpl; diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 48b52f8aa6ef..2d33c0ac0214 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -307,6 +307,7 @@ class ThreadStatus : public boost::noncopyable void flushUntrackedMemory(); + void initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period); private: void applyGlobalSettings(); void applyQuerySettings(); diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6608a35a5a2c..e05b3cf9e314 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -137,6 +137,8 @@ namespace DB M(UInt64, http_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ M(UInt64, http_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ M(UInt64, http_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the http connections which do not belong to any disk or storage.", 0) \ + M(UInt64, global_profiler_real_time_period_ns, 0, "Period for real clock timer of global profiler (in nanoseconds). Set 0 value to turn off the real clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ + M(UInt64, global_profiler_cpu_time_period_ns, 0, "Period for CPU clock timer of global profiler (in nanoseconds). Set 0 value to turn off the CPU clock global profiler. Recommended value is at least 10000000 (100 times a second) for single queries or 1000000000 (once a second) for cluster-wide profiling.", 0) \ /// If you add a setting which can be updated at runtime, please update 'changeable_settings' map in StorageSystemServerSettings.cpp diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 1c24c4f85c99..4b9bd069bc6a 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #if defined(OS_LINUX) # include @@ -457,6 +458,27 @@ void ThreadStatus::resetPerformanceCountersLastUsage() taskstats->reset(); } + +void ThreadStatus::initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period) +{ + + try + { + if (global_profiler_real_time_period > 0) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_real_time_period)); + + if (global_profiler_cpu_time_period > 0) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_cpu_time_period)); + } + catch (...) + { + tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); + } + +} + void ThreadStatus::initQueryProfiler() { if (internal_thread) @@ -474,12 +496,22 @@ void ThreadStatus::initQueryProfiler() try { if (settings.query_profiler_real_time_period_ns > 0) - query_profiler_real = std::make_unique(thread_id, - /* period= */ static_cast(settings.query_profiler_real_time_period_ns)); + { + if (!query_profiler_real) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(settings.query_profiler_real_time_period_ns)); + else + query_profiler_real->setPeriod(static_cast(settings.query_profiler_real_time_period_ns)); + } if (settings.query_profiler_cpu_time_period_ns > 0) - query_profiler_cpu = std::make_unique(thread_id, - /* period= */ static_cast(settings.query_profiler_cpu_time_period_ns)); + { + if (!query_profiler_cpu) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(settings.query_profiler_cpu_time_period_ns)); + else + query_profiler_cpu->setPeriod(static_cast(settings.query_profiler_cpu_time_period_ns)); + } } catch (...) { diff --git a/tests/config/config.d/serverwide_trace_collector.xml b/tests/config/config.d/serverwide_trace_collector.xml new file mode 100644 index 000000000000..602e07469f3e --- /dev/null +++ b/tests/config/config.d/serverwide_trace_collector.xml @@ -0,0 +1,4 @@ + + 1000000000 + 1000000000 + diff --git a/tests/integration/test_trace_collector_serverwide/__init__.py b/tests/integration/test_trace_collector_serverwide/__init__.py new file mode 100644 index 000000000000..e5a0d9b4834e --- /dev/null +++ b/tests/integration/test_trace_collector_serverwide/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_trace_collector_serverwide/configs/global_profiler.xml b/tests/integration/test_trace_collector_serverwide/configs/global_profiler.xml new file mode 100644 index 000000000000..5112d2671825 --- /dev/null +++ b/tests/integration/test_trace_collector_serverwide/configs/global_profiler.xml @@ -0,0 +1,4 @@ + + 10000000 + 10000000 + diff --git a/tests/integration/test_trace_collector_serverwide/test.py b/tests/integration/test_trace_collector_serverwide/test.py new file mode 100644 index 000000000000..5a7bba15fd71 --- /dev/null +++ b/tests/integration/test_trace_collector_serverwide/test.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 + +import pytest +import time + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance("node1", main_configs=["configs/global_profiler.xml"]) + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + + yield cluster + finally: + cluster.shutdown() + +def test_global_thread_profiler(start_cluster): + node1.query("CREATE TABLE t (key UInt32, value String) Engine = MergeTree() ORDER BY key") + + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + + time.sleep(5) + + node1.query("SYSTEM FLUSH LOGS") + + assert int(node1.query("SELECT count() FROM system.trace_log where trace_type='Real' and query_id = ''").strip()) > 0 From 3d9a6e9b8e5f8864f9f8a0481439a316c5deaba7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:06:02 +0000 Subject: [PATCH 094/470] Close https://github.com/ClickHouse/ClickHouse/issues/48308 --- .../03038_ambiguous_column.reference | 0 .../0_stateless/03038_ambiguous_column.sql | 41 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tests/queries/0_stateless/03038_ambiguous_column.reference create mode 100644 tests/queries/0_stateless/03038_ambiguous_column.sql diff --git a/tests/queries/0_stateless/03038_ambiguous_column.reference b/tests/queries/0_stateless/03038_ambiguous_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03038_ambiguous_column.sql b/tests/queries/0_stateless/03038_ambiguous_column.sql new file mode 100644 index 000000000000..69c8e52d7341 --- /dev/null +++ b/tests/queries/0_stateless/03038_ambiguous_column.sql @@ -0,0 +1,41 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/48308 +DROP TABLE IF EXISTS 03038_table; + +CREATE TABLE 03038_table +( + `time` DateTime +) +ENGINE = MergeTree +ORDER BY time; + +SELECT * +FROM +( + SELECT + toUInt64(time) AS time, + toHour(03038_table.time) + FROM 03038_table +) +ORDER BY time ASC; + +WITH subquery AS ( + SELECT + toUInt64(time) AS time, + toHour(03038_table.time) + FROM 03038_table +) +SELECT * +FROM subquery +ORDER BY subquery.time ASC; + +SELECT * +FROM +( + SELECT + toUInt64(time) AS time, + toHour(03038_table.time) AS hour + FROM 03038_table +) +ORDER BY time ASC, hour; + +DROP TABLE IF EXISTS 03038_table; From 723a733c84a241de56fbf66ffc84a332c995c673 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Apr 2024 18:07:02 +0200 Subject: [PATCH 095/470] Missing change --- tests/config/install.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/config/install.sh b/tests/config/install.sh index 652d25a0a35b..06f2f5fe902b 100755 --- a/tests/config/install.sh +++ b/tests/config/install.sh @@ -67,6 +67,7 @@ ln -sf $SRC_PATH/config.d/validate_tcp_client_information.xml $DEST_SERVER_PATH/ ln -sf $SRC_PATH/config.d/zero_copy_destructive_operations.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/block_number.xml $DEST_SERVER_PATH/config.d/ ln -sf $SRC_PATH/config.d/handlers.yaml $DEST_SERVER_PATH/config.d/ +ln -sf $SRC_PATH/config.d/serverwide_trace_collector.xml $DEST_SERVER_PATH/config.d/ # Not supported with fasttest. if [ "${DEST_SERVER_PATH}" = "/etc/clickhouse-server" ] From d11d10050b3315227dbf0019a86bd8fa25d9bf71 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Apr 2024 18:08:04 +0200 Subject: [PATCH 096/470] Remove debug line --- src/Common/QueryProfiler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index 3b7289167e34..f985ec95e881 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -198,7 +198,6 @@ void Timer::cleanup() { if (timer_id) { - LOG_INFO(log, "CLEANUP TIMER"); int err = timer_delete(*timer_id); if (err) LOG_ERROR(log, "Failed to delete query profiler timer {}", errnoToString()); From a54efe56450ed781e5fb101014cd460b9db6fefb Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:11:02 +0000 Subject: [PATCH 097/470] Close https://github.com/ClickHouse/ClickHouse/issues/45535 --- ...known_identifier_window_function.reference | 40 +++++++++++++++++++ ...039_unknown_identifier_window_function.sql | 34 ++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100644 tests/queries/0_stateless/03039_unknown_identifier_window_function.reference create mode 100644 tests/queries/0_stateless/03039_unknown_identifier_window_function.sql diff --git a/tests/queries/0_stateless/03039_unknown_identifier_window_function.reference b/tests/queries/0_stateless/03039_unknown_identifier_window_function.reference new file mode 100644 index 000000000000..405da28a5798 --- /dev/null +++ b/tests/queries/0_stateless/03039_unknown_identifier_window_function.reference @@ -0,0 +1,40 @@ +0 10 +1 10 +2 10 +3 10 +4 10 +5 10 +6 10 +7 10 +8 10 +9 10 +0 10 +1 10 +2 10 +3 10 +4 10 +5 10 +6 10 +7 10 +8 10 +9 10 +0 10 0 +1 10 1 +2 10 2 +3 10 3 +4 10 4 +5 10 5 +6 10 6 +7 10 7 +8 10 8 +9 10 9 +0 10 +1 10 +2 10 +3 10 +4 10 +5 10 +6 10 +7 10 +8 10 +9 10 diff --git a/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql b/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql new file mode 100644 index 000000000000..ca3bb521eba2 --- /dev/null +++ b/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql @@ -0,0 +1,34 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/45535 + +SELECT + *, + count() OVER () AS c +FROM numbers(10) +ORDER BY toString(number); + + +WITH + toString(number) as str +SELECT + *, + count() OVER () AS c +FROM numbers(10) +ORDER BY str; + +SELECT + *, + count() OVER () AS c, + toString(number) as str +FROM numbers(10) +ORDER BY str; + + +WITH + test AS ( + SELECT + *, + count() OVER () AS c + FROM numbers(10) + ) +SELECT * FROM test +ORDER BY toString(number); From 44847fbb2faf0d82a05c4b247d4540ca446f2269 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Apr 2024 18:16:22 +0200 Subject: [PATCH 098/470] Analyzer: cover new analyzer with old analyzer fails --- .../03040_array_sum_and_join.reference | 5 ++++ .../0_stateless/03040_array_sum_and_join.sql | 26 +++++++++++++++++++ 2 files changed, 31 insertions(+) create mode 100644 tests/queries/0_stateless/03040_array_sum_and_join.reference create mode 100644 tests/queries/0_stateless/03040_array_sum_and_join.sql diff --git a/tests/queries/0_stateless/03040_array_sum_and_join.reference b/tests/queries/0_stateless/03040_array_sum_and_join.reference new file mode 100644 index 000000000000..d81fd9a2f69c --- /dev/null +++ b/tests/queries/0_stateless/03040_array_sum_and_join.reference @@ -0,0 +1,5 @@ +79 name1 42.7027027027027 +62 name2 33.513513513513516 +44 name3 23.783783783783786 +[[1,2],[1,2]] +[(3,[1,2]),(4,[1,2])] diff --git a/tests/queries/0_stateless/03040_array_sum_and_join.sql b/tests/queries/0_stateless/03040_array_sum_and_join.sql new file mode 100644 index 000000000000..0084f0e4c7bb --- /dev/null +++ b/tests/queries/0_stateless/03040_array_sum_and_join.sql @@ -0,0 +1,26 @@ + +select t.1 as cnt, + t.2 as name, + t.3 as percent +from ( + select arrayJoin(result) as t + from ( + select [ + (79, 'name1'), + (62, 'name2'), + (44, 'name3') + ] as data, + arraySum(arrayMap(t -> t.1, data)) as total, + arrayMap(t -> + tuple(t.1, t.2, + multiIf(total = 0, 0, t.1 > 0 and t.1 < 10, -1.0, + (toFloat32(t.1) / toFloat32(total)) * 100) + ), + data + ) as result + ) + ); + +SELECT arrayMap(x -> arrayMap(x -> (x.1), [(1, 1), (2, 2)]), [(3, 3), (4, 4)]); + +SELECT arrayMap(x -> (x.1, arrayMap(x -> (x.1), [(1, 1), (2, 2)])), [(3, 3), (4, 4)]); From a3028ed9cd4c88d8ab5bb86ab47a4ec1475df067 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 2 Apr 2024 16:20:09 +0000 Subject: [PATCH 099/470] Automatic style fix --- .../test_trace_collector_serverwide/test.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_trace_collector_serverwide/test.py b/tests/integration/test_trace_collector_serverwide/test.py index 5a7bba15fd71..88d235642b9b 100644 --- a/tests/integration/test_trace_collector_serverwide/test.py +++ b/tests/integration/test_trace_collector_serverwide/test.py @@ -10,6 +10,7 @@ node1 = cluster.add_instance("node1", main_configs=["configs/global_profiler.xml"]) + @pytest.fixture(scope="module") def start_cluster(): try: @@ -19,8 +20,11 @@ def start_cluster(): finally: cluster.shutdown() + def test_global_thread_profiler(start_cluster): - node1.query("CREATE TABLE t (key UInt32, value String) Engine = MergeTree() ORDER BY key") + node1.query( + "CREATE TABLE t (key UInt32, value String) Engine = MergeTree() ORDER BY key" + ) node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") node1.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") @@ -35,4 +39,11 @@ def test_global_thread_profiler(start_cluster): node1.query("SYSTEM FLUSH LOGS") - assert int(node1.query("SELECT count() FROM system.trace_log where trace_type='Real' and query_id = ''").strip()) > 0 + assert ( + int( + node1.query( + "SELECT count() FROM system.trace_log where trace_type='Real' and query_id = ''" + ).strip() + ) + > 0 + ) From 18402c6191dfa5bc4ec8a9278253f5bc241b60df Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:22:04 +0000 Subject: [PATCH 100/470] Close https://github.com/ClickHouse/ClickHouse/issues/44365 --- .../0_stateless/03040_alias_column_join.reference | 0 .../queries/0_stateless/03040_alias_column_join.sql | 13 +++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03040_alias_column_join.reference create mode 100644 tests/queries/0_stateless/03040_alias_column_join.sql diff --git a/tests/queries/0_stateless/03040_alias_column_join.reference b/tests/queries/0_stateless/03040_alias_column_join.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03040_alias_column_join.sql b/tests/queries/0_stateless/03040_alias_column_join.sql new file mode 100644 index 000000000000..f4ea2e5914df --- /dev/null +++ b/tests/queries/0_stateless/03040_alias_column_join.sql @@ -0,0 +1,13 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/44365 +DROP TABLE IF EXISTS 03040_test; + +CREATE TABLE 03040_test +( + id UInt64, + val String alias 'value: '||toString(id) +) ENGINE = MergeTree +ORDER BY tuple(); + +SELECT val FROM 03040_test t GROUP BY val; + +DROP TABLE IF EXISTS 03040_test; From 170f50e095f9d6076c84ba8825c08310ebd55c8e Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Mon, 1 Apr 2024 09:57:10 +0200 Subject: [PATCH 101/470] More than 255 replicas in ReplicatedTableStatus --- src/Storages/MergeTree/ReplicatedTableStatus.h | 4 ++-- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedTableStatus.h b/src/Storages/MergeTree/ReplicatedTableStatus.h index ce9ad3640f41..786a5fdb44d0 100644 --- a/src/Storages/MergeTree/ReplicatedTableStatus.h +++ b/src/Storages/MergeTree/ReplicatedTableStatus.h @@ -24,8 +24,8 @@ struct ReplicatedTableStatus UInt64 log_max_index; UInt64 log_pointer; UInt64 absolute_delay; - UInt8 total_replicas; - UInt8 active_replicas; + UInt32 total_replicas; + UInt32 active_replicas; UInt64 lost_part_count; String last_queue_update_exception; /// If the error has happened fetching the info from ZooKeeper, this field will be set. diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ca061db4ecf..b0f82e85ac4d 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -7003,7 +7003,7 @@ void StorageReplicatedMergeTree::getStatus(ReplicatedTableStatus & res, bool wit } res.log_pointer = log_pointer_str.empty() ? 0 : parse(log_pointer_str); - res.total_replicas = all_replicas.size(); + res.total_replicas = UInt32(all_replicas.size()); if (get_result[1].error == Coordination::Error::ZNONODE) res.lost_part_count = 0; else From 478cabee22a66008988a0302c8b5111e59f6f70e Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Apr 2024 18:33:56 +0200 Subject: [PATCH 102/470] Close: https://github.com/ClickHouse/ClickHouse/issues/15411 --- .../03041_analyzer_gigachad_join.reference | 1 + .../0_stateless/03041_analyzer_gigachad_join.sql | 14 ++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/03041_analyzer_gigachad_join.reference create mode 100644 tests/queries/0_stateless/03041_analyzer_gigachad_join.sql diff --git a/tests/queries/0_stateless/03041_analyzer_gigachad_join.reference b/tests/queries/0_stateless/03041_analyzer_gigachad_join.reference new file mode 100644 index 000000000000..a859a6005123 --- /dev/null +++ b/tests/queries/0_stateless/03041_analyzer_gigachad_join.reference @@ -0,0 +1 @@ +123456789 111 222 diff --git a/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql b/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql new file mode 100644 index 000000000000..462e63b121b6 --- /dev/null +++ b/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql @@ -0,0 +1,14 @@ +CREATE TABLE IF NOT EXISTS first engine = MergeTree PARTITION BY (inn, toYYYYMM(received)) ORDER BY (inn, sessionId) +AS SELECT now() AS received, '123456789' AS inn, '42' AS sessionId; + +CREATE TABLE IF NOT EXISTS second engine = MergeTree PARTITION BY (inn, toYYYYMM(received)) ORDER BY (inn, sessionId) +AS SELECT now() AS received, '123456789' AS inn, '42' AS sessionId, '111' AS serial, '222' AS reg; + +SELECT alias_first.inn, arrayFirst(t -> isNotNull(t), regInfo.1), arrayFirst(t -> isNotNull(t), regInfo.2) + FROM first AS alias_first + INNER JOIN ( + SELECT alias_second.inn, alias_second.sessionId, groupArray((serial, reg)) AS regInfo + FROM second AS alias_second + GROUP BY inn, sessionId + ) AS resp ON (alias_first.inn = resp.inn) AND (alias_first.sessionId = resp.sessionId) +WHERE if('123456789' IS NOT NULL, alias_first.inn = '123456789', 1) From a4a56a9b6c0ce7ce66006be87d8bf0ef8b660aed Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:38:18 +0000 Subject: [PATCH 103/470] Close https://github.com/ClickHouse/ClickHouse/issues/44153 --- .../03041_select_with_query_result.reference | 0 .../03041_select_with_query_result.sql | 41 +++++++++++++++++++ 2 files changed, 41 insertions(+) create mode 100644 tests/queries/0_stateless/03041_select_with_query_result.reference create mode 100644 tests/queries/0_stateless/03041_select_with_query_result.sql diff --git a/tests/queries/0_stateless/03041_select_with_query_result.reference b/tests/queries/0_stateless/03041_select_with_query_result.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03041_select_with_query_result.sql b/tests/queries/0_stateless/03041_select_with_query_result.sql new file mode 100644 index 000000000000..3edf51d635e7 --- /dev/null +++ b/tests/queries/0_stateless/03041_select_with_query_result.sql @@ -0,0 +1,41 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/44153 +DROP TABLE IF EXISTS parent; +DROP TABLE IF EXISTS join_table_1; +DROP TABLE IF EXISTS join_table_2; + +CREATE TABLE parent( + a_id Int64, + b_id Int64, + c_id Int64, + created_at Int64 +) +ENGINE=MergeTree() +ORDER BY (a_id, b_id, c_id, created_at); + +CREATE TABLE join_table_1( + a_id Int64, + b_id Int64 +) +ENGINE=MergeTree() +ORDER BY (a_id, b_id); + +CREATE TABLE join_table_2( + c_id Int64, + created_at Int64 +) +ENGINE=MergeTree() +ORDER BY (c_id, created_at); + +WITH with_table as ( + SELECT p.a_id, p.b_id, p.c_id FROM parent p + LEFT JOIN join_table_1 jt1 ON jt1.a_id = p.a_id AND jt1.b_id = p.b_id + LEFT JOIN join_table_2 jt2 ON jt2.c_id = p.c_id + WHERE + p.a_id = 0 AND (jt2.c_id = 0 OR p.created_at = 0) +) +SELECT p.a_id, p.b_id, COUNT(*) as f_count FROM with_table +GROUP BY p.a_id, p.b_id; + +DROP TABLE IF EXISTS parent; +DROP TABLE IF EXISTS join_table_1; +DROP TABLE IF EXISTS join_table_2; From 0a5747377bb44bee51dcf6223930c16529cb9a83 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Apr 2024 18:43:12 +0200 Subject: [PATCH 104/470] Close: https://github.com/ClickHouse/ClickHouse/issues/14978 --- .../03042_analyzer_alias_join.reference | 0 .../0_stateless/03042_analyzer_alias_join.sql | 20 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/03042_analyzer_alias_join.reference create mode 100644 tests/queries/0_stateless/03042_analyzer_alias_join.sql diff --git a/tests/queries/0_stateless/03042_analyzer_alias_join.reference b/tests/queries/0_stateless/03042_analyzer_alias_join.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03042_analyzer_alias_join.sql b/tests/queries/0_stateless/03042_analyzer_alias_join.sql new file mode 100644 index 000000000000..f3341fd314a8 --- /dev/null +++ b/tests/queries/0_stateless/03042_analyzer_alias_join.sql @@ -0,0 +1,20 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/14978 +CREATE TABLE test1(id UInt64, t1value UInt64) ENGINE=MergeTree ORDER BY tuple(); +CREATE TABLE test2(id UInt64, t2value String) ENGINE=MergeTree ORDER BY tuple(); + +SELECT NULL AS t2value +FROM test1 t1 +LEFT JOIN ( + SELECT id, t2value FROM test2 +) t2 +ON t1.id=t2.id +WHERE t2.t2value='test'; + +-- workaround should work too +SELECT NULL AS _svalue +FROM test1 t1 +LEFT JOIN ( + SELECT id, t2value FROM test2 +) t2 +ON t1.id=t2.id +WHERE t2.t2value='test'; From 1fb23c64f15b70228f7b0911f4e5358c4a077b61 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:41:42 +0000 Subject: [PATCH 105/470] Close https://github.com/ClickHouse/ClickHouse/issues/42399 --- .../0_stateless/03042_not_found_column_c1.reference | 0 tests/queries/0_stateless/03042_not_found_column_c1.sql | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03042_not_found_column_c1.reference create mode 100644 tests/queries/0_stateless/03042_not_found_column_c1.sql diff --git a/tests/queries/0_stateless/03042_not_found_column_c1.reference b/tests/queries/0_stateless/03042_not_found_column_c1.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03042_not_found_column_c1.sql b/tests/queries/0_stateless/03042_not_found_column_c1.sql new file mode 100644 index 000000000000..8ce7dcd9d4f8 --- /dev/null +++ b/tests/queries/0_stateless/03042_not_found_column_c1.sql @@ -0,0 +1,8 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/42399 + +CREATE TABLE IF NOT EXISTS t0 (c0 Int32) ENGINE = Memory() ; +CREATE TABLE t1 (c0 Int32, c1 Int32, c2 Int32) ENGINE = Memory() ; +CREATE TABLE t2 (c0 String, c1 String MATERIALIZED (c2), c2 Int32) ENGINE = Memory() ; +CREATE TABLE t3 (c0 String, c1 String, c2 String) ENGINE = Log() ; +CREATE TABLE IF NOT EXISTS t4 (c0 Int32) ENGINE = Log() ; +SELECT t3.c1, t3.c2, t1.c1, t1.c0, t2.c2, t0.c0, t1.c2, t2.c1, t4.c0 FROM t3, t0, t1, t2, t4; From 2d8f07318c06ec330c8d6e87facd387bc2b63341 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:45:21 +0000 Subject: [PATCH 106/470] Close: https://github.com/ClickHouse/ClickHouse/issues/27115 --- ...3_group_array_result_is_expected.reference | 1 + .../03043_group_array_result_is_expected.sql | 44 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 tests/queries/0_stateless/03043_group_array_result_is_expected.reference create mode 100644 tests/queries/0_stateless/03043_group_array_result_is_expected.sql diff --git a/tests/queries/0_stateless/03043_group_array_result_is_expected.reference b/tests/queries/0_stateless/03043_group_array_result_is_expected.reference new file mode 100644 index 000000000000..d43aa556dce1 --- /dev/null +++ b/tests/queries/0_stateless/03043_group_array_result_is_expected.reference @@ -0,0 +1 @@ +['2021-07-01','2021-07-02','2021-07-03','2021-07-04','2021-07-05','2021-07-06','2021-07-07','2021-07-08','2021-07-09','2021-07-10','2021-07-11','2021-07-12','2021-07-13','2021-07-14','2021-07-15','2021-07-16','2021-07-17','2021-07-18','2021-07-19','2021-07-20','2021-07-21','2021-07-22','2021-07-23','2021-07-24','2021-07-25','2021-07-26','2021-07-27','2021-07-28','2021-07-29'] 29 diff --git a/tests/queries/0_stateless/03043_group_array_result_is_expected.sql b/tests/queries/0_stateless/03043_group_array_result_is_expected.sql new file mode 100644 index 000000000000..df77ca666471 --- /dev/null +++ b/tests/queries/0_stateless/03043_group_array_result_is_expected.sql @@ -0,0 +1,44 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/27115 +drop table if exists fill_ex; + +create table fill_ex ( + eventDate Date , + storeId String +) +engine = ReplacingMergeTree() +partition by toYYYYMM(eventDate) +order by (storeId,eventDate); + +insert into fill_ex (eventDate,storeId) values ('2021-07-16','s') ('2021-07-17','ee'); + +select + groupArray(key) as keys, + count() as c +from + ( + select + *, + eventDate as key + from + ( + select + eventDate + from + ( + select + eventDate + from + fill_ex final + where + eventDate >= toDate('2021-07-01') + and eventDate Date: Tue, 2 Apr 2024 17:48:28 +0100 Subject: [PATCH 107/470] Bump From ed0522ae1a2e96e59386a7ed25ba85a774d429ad Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Apr 2024 18:48:00 +0200 Subject: [PATCH 108/470] Close: https://github.com/ClickHouse/ClickHouse/issues/17319 --- .../03044_analyzer_alias_join.reference | 0 .../0_stateless/03044_analyzer_alias_join.sql | 17 +++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/03044_analyzer_alias_join.reference create mode 100644 tests/queries/0_stateless/03044_analyzer_alias_join.sql diff --git a/tests/queries/0_stateless/03044_analyzer_alias_join.reference b/tests/queries/0_stateless/03044_analyzer_alias_join.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03044_analyzer_alias_join.sql b/tests/queries/0_stateless/03044_analyzer_alias_join.sql new file mode 100644 index 000000000000..5202b57a7b11 --- /dev/null +++ b/tests/queries/0_stateless/03044_analyzer_alias_join.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/17319 +CREATE TEMPORARY TABLE hits (date Date, data Float64) engine=Memory(); + +SELECT + subquery1.period AS period, + if(1=1, 0, subquery1.data1) AS data, + if(1=1, 0, subquery2.data) AS other_data +FROM +( + SELECT date AS period, data AS data1 + FROM hits +) AS subquery1 +LEFT JOIN +( + SELECT date AS period, data AS data + FROM hits +) AS subquery2 ON (subquery1.period = subquery2.period) From f1fb042be3d54d0347568abcc6e5c3358665d075 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Apr 2024 18:55:25 +0200 Subject: [PATCH 109/470] Close: https://github.com/ClickHouse/ClickHouse/issues/13210 --- ...3045_analyzer_alias_join_with_if.reference | 0 .../03045_analyzer_alias_join_with_if.sql | 33 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 tests/queries/0_stateless/03045_analyzer_alias_join_with_if.reference create mode 100644 tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql diff --git a/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.reference b/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql b/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql new file mode 100644 index 000000000000..a0546f57736f --- /dev/null +++ b/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql @@ -0,0 +1,33 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/13210 +CREATE TABLE test_a_table ( + name String, + a_col String +) +Engine = MergeTree() +ORDER BY name; + +CREATE TABLE test_b_table ( + name String, + b_col String, + some_val String +) +Engine = MergeTree() +ORDER BY name; + +SELECT + b.name name, + a.a_col a_col, + b.b_col b_col, + 'N' some_val +from test_a_table a +join test_b_table b on a.name = b.name +where b.some_val = 'Y'; + +SELECT + b.name name, + a.a_col a_col, + b.b_col b_col, + if(1,'N',b.some_val) some_val +from test_a_table a +join test_b_table b on a.name = b.name +where b.some_val = 'Y'; From b19b0890becd3cf9f3d7e23744e0e477ca12e4ef Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:56:16 +0000 Subject: [PATCH 110/470] Close: https://github.com/ClickHouse/ClickHouse/issues/11813 --- .../03044_array_join_columns_in_nested_table.reference | 1 + .../0_stateless/03044_array_join_columns_in_nested_table.sql | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/03044_array_join_columns_in_nested_table.reference create mode 100644 tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql diff --git a/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.reference b/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql b/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql new file mode 100644 index 000000000000..f3ec80b8a94c --- /dev/null +++ b/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql @@ -0,0 +1,2 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/11813 +select 1 from (select 1 x) l join (select 1 y, [1] a) r on l.x = r.y array join r.a; From 042e612485d7083749342b8658bbb5e580da580a Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 16:59:03 +0000 Subject: [PATCH 111/470] Close: https://github.com/ClickHouse/ClickHouse/issues/23053 --- ...wn_identifier_alias_substitution.reference | 0 ..._unknown_identifier_alias_substitution.sql | 20 +++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.reference create mode 100644 tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql diff --git a/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.reference b/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql b/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql new file mode 100644 index 000000000000..cadcbdc0ce54 --- /dev/null +++ b/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql @@ -0,0 +1,20 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23053 +DROP TABLE IF EXISTS repl_tbl; + +CREATE TEMPORARY TABLE repl_tbl +( + `key` UInt32, + `val_1` UInt32, + `val_2` String, + `val_3` String, + `val_4` String, + `val_5` UUID, + `ts` DateTime +) +ENGINE = ReplacingMergeTree(ts) +ORDER BY `key`; +set prefer_column_name_to_alias = 1; +INSERT INTO repl_tbl (key) SELECT number FROM numbers(10); +WITH 10 as k SELECT k as key, * FROM repl_tbl WHERE key = k; + +DROP TABLE IF EXISTS repl_tbl; From 668aa9bafd25b6c27a8aba02dd0b1d53c782fc65 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:03:40 +0000 Subject: [PATCH 112/470] Close: https://github.com/ClickHouse/ClickHouse/issues/37729 --- ...03046_column_in_block_array_join.reference | 2 + .../03046_column_in_block_array_join.sql | 37 +++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/queries/0_stateless/03046_column_in_block_array_join.reference create mode 100644 tests/queries/0_stateless/03046_column_in_block_array_join.sql diff --git a/tests/queries/0_stateless/03046_column_in_block_array_join.reference b/tests/queries/0_stateless/03046_column_in_block_array_join.reference new file mode 100644 index 000000000000..f9264f7fbd31 --- /dev/null +++ b/tests/queries/0_stateless/03046_column_in_block_array_join.reference @@ -0,0 +1,2 @@ +Hello +World diff --git a/tests/queries/0_stateless/03046_column_in_block_array_join.sql b/tests/queries/0_stateless/03046_column_in_block_array_join.sql new file mode 100644 index 000000000000..9a2bb19d81e0 --- /dev/null +++ b/tests/queries/0_stateless/03046_column_in_block_array_join.sql @@ -0,0 +1,37 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/37729 + +DROP TABLE IF EXISTS nested_test; +DROP TABLE IF EXISTS join_test; + +CREATE TABLE nested_test +( + s String, + nest Nested + ( + x UInt64, + y UInt64 + ) +) ENGINE = MergeTree +ORDER BY s; + +CREATE TABLE join_test +( + id Int64, + y UInt64 +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO nested_test +VALUES ('Hello', [1,2], [10,20]), ('World', [3,4,5], [30,40,50]), ('Goodbye', [], []); + +INSERT INTO join_test +VALUES (1,1),(2,4),(3,20),(4,40); + +SELECT s +FROM nested_test AS t1 +ARRAY JOIN nest +INNER JOIN join_test AS t2 ON nest.y = t2.y; + +DROP TABLE IF EXISTS nested_test; +DROP TABLE IF EXISTS join_test; From 9b74e246af4eec622ca749aebbbe2985428e51f7 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Apr 2024 19:03:54 +0200 Subject: [PATCH 113/470] Rename a method --- src/Interpreters/Cache/FileSegment.cpp | 132 ++++++++++++------------- src/Interpreters/Cache/FileSegment.h | 6 +- 2 files changed, 69 insertions(+), 69 deletions(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 0d64b602928d..f56f5d3f66c4 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -113,7 +113,7 @@ FileSegment::Range::Range(size_t left_, size_t right_) : left(left_), right(righ FileSegment::State FileSegment::state() const { - auto lock = lockFileSegment(); + auto lk = lock(); return download_state; } @@ -130,7 +130,7 @@ String FileSegment::tryGetPath() const return metadata->getFileSegmentPath(*this); } -FileSegmentGuard::Lock FileSegment::lockFileSegment() const +FileSegmentGuard::Lock FileSegment::lock() const { ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentLockMicroseconds); return segment_guard.lock(); @@ -152,29 +152,30 @@ void FileSegment::setDownloadState(State state, const FileSegmentGuard::Lock & l size_t FileSegment::getReservedSize() const { - auto lock = lockFileSegment(); + auto lk = lock(); return reserved_size; } FileSegment::Priority::IteratorPtr FileSegment::getQueueIterator() const { - auto lock = lockFileSegment(); + auto lk = lock(); return queue_iterator; } void FileSegment::setQueueIterator(Priority::IteratorPtr iterator) { - auto lock = lockFileSegment(); - setQueueIteratorUnlocked(iterator, lock); -} - -void FileSegment::setQueueIteratorUnlocked(Priority::IteratorPtr iterator, const FileSegmentGuard::Lock &) -{ + auto lk = lock(); if (queue_iterator) throw Exception(ErrorCodes::LOGICAL_ERROR, "Queue iterator cannot be set twice"); queue_iterator = iterator; } +void FileSegment::resetQueueIterator() +{ + auto lk = lock(); + queue_iterator.reset(); +} + size_t FileSegment::getCurrentWriteOffset() const { return range().left + downloaded_size; @@ -187,14 +188,14 @@ size_t FileSegment::getDownloadedSize() const void FileSegment::setDownloadedSize(size_t delta) { - auto lock = lockFileSegment(); + auto lk = lock(); downloaded_size += delta; assert(downloaded_size == std::filesystem::file_size(getPath())); } bool FileSegment::isDownloaded() const { - auto lock = lockFileSegment(); + auto lk = lock(); return download_state == State::DOWNLOADED; } @@ -208,8 +209,7 @@ String FileSegment::getCallerId() String FileSegment::getDownloader() const { - auto lock = lockFileSegment(); - return getDownloaderUnlocked(lock); + return getDownloaderUnlocked(lock()); } String FileSegment::getDownloaderUnlocked(const FileSegmentGuard::Lock &) const @@ -219,11 +219,11 @@ String FileSegment::getDownloaderUnlocked(const FileSegmentGuard::Lock &) const String FileSegment::getOrSetDownloader() { - auto lock = lockFileSegment(); + auto lk = lock(); - assertNotDetachedUnlocked(lock); + assertNotDetachedUnlocked(lk); - auto current_downloader = getDownloaderUnlocked(lock); + auto current_downloader = getDownloaderUnlocked(lk); if (current_downloader.empty()) { @@ -233,7 +233,7 @@ String FileSegment::getOrSetDownloader() return "notAllowed:" + stateToString(download_state); current_downloader = downloader_id = caller_id; - setDownloadState(State::DOWNLOADING, lock); + setDownloadState(State::DOWNLOADING, lk); chassert(key_metadata.lock()); } @@ -257,15 +257,15 @@ void FileSegment::resetDownloadingStateUnlocked(const FileSegmentGuard::Lock & l void FileSegment::resetDownloader() { - auto lock = lockFileSegment(); + auto lk = lock(); SCOPE_EXIT({ cv.notify_all(); }); - assertNotDetachedUnlocked(lock); - assertIsDownloaderUnlocked("resetDownloader", lock); + assertNotDetachedUnlocked(lk); + assertIsDownloaderUnlocked("resetDownloader", lk); - resetDownloadingStateUnlocked(lock); - resetDownloaderUnlocked(lock); + resetDownloadingStateUnlocked(lk); + resetDownloaderUnlocked(lk); } void FileSegment::resetDownloaderUnlocked(const FileSegmentGuard::Lock &) @@ -294,8 +294,8 @@ void FileSegment::assertIsDownloaderUnlocked(const std::string & operation, cons bool FileSegment::isDownloader() const { - auto lock = lockFileSegment(); - return isDownloaderUnlocked(lock); + auto lk = lock(); + return isDownloaderUnlocked(lk); } bool FileSegment::isDownloaderUnlocked(const FileSegmentGuard::Lock & lock) const @@ -305,21 +305,21 @@ bool FileSegment::isDownloaderUnlocked(const FileSegmentGuard::Lock & lock) cons FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader() { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("getRemoteFileReader", lock); + auto lk = lock(); + assertIsDownloaderUnlocked("getRemoteFileReader", lk); return remote_file_reader; } void FileSegment::resetRemoteFileReader() { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("resetRemoteFileReader", lock); + auto lk = lock(); + assertIsDownloaderUnlocked("resetRemoteFileReader", lk); remote_file_reader.reset(); } FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader() { - auto lock = lockFileSegment(); + auto lk = lock(); if (remote_file_reader && (download_state == State::DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)) { @@ -330,8 +330,8 @@ FileSegment::RemoteFileReaderPtr FileSegment::extractRemoteFileReader() void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_) { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("setRemoteFileReader", lock); + auto lk = lock(); + assertIsDownloaderUnlocked("setRemoteFileReader", lk); if (remote_file_reader) throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader already exists"); @@ -347,9 +347,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset) throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing zero size is not allowed"); { - auto lock = lockFileSegment(); - assertIsDownloaderUnlocked("write", lock); - assertNotDetachedUnlocked(lock); + auto lk = lock(); + assertIsDownloaderUnlocked("write", lk); + assertNotDetachedUnlocked(lk); } const auto file_segment_path = getPath(); @@ -408,10 +408,10 @@ void FileSegment::write(const char * from, size_t size, size_t offset) const int code = e.getErrno(); const bool is_no_space_left_error = code == /* No space left on device */28 || code == /* Quota exceeded */122; - auto lock = lockFileSegment(); + auto lk = lock(); - e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lock))); - setDownloadFailedUnlocked(lock); + e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lk))); + setDownloadFailedUnlocked(lk); if (downloaded_size == 0 && fs::exists(file_segment_path)) { @@ -434,9 +434,9 @@ void FileSegment::write(const char * from, size_t size, size_t offset) } catch (Exception & e) { - auto lock = lockFileSegment(); - e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lock))); - setDownloadFailedUnlocked(lock); + auto lk = lock(); + e.addMessage(fmt::format("{}, current cache state: {}", e.what(), getInfoForLogUnlocked(lk))); + setDownloadFailedUnlocked(lk); throw; } @@ -449,7 +449,7 @@ FileSegment::State FileSegment::wait(size_t offset) span.addAttribute("clickhouse.key", key().toString()); span.addAttribute("clickhouse.offset", offset); - auto lock = lockFileSegment(); + auto lk = lock(); if (downloader_id.empty() || offset < getCurrentWriteOffset()) return download_state; @@ -462,10 +462,10 @@ FileSegment::State FileSegment::wait(size_t offset) LOG_TEST(log, "{} waiting on: {}, current downloader: {}", getCallerId(), range().toString(), downloader_id); ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentWaitMicroseconds); - chassert(!getDownloaderUnlocked(lock).empty()); - chassert(!isDownloaderUnlocked(lock)); + chassert(!getDownloaderUnlocked(lk).empty()); + chassert(!isDownloaderUnlocked(lk)); - [[maybe_unused]] const auto ok = cv.wait_for(lock, std::chrono::seconds(60), [&, this]() + [[maybe_unused]] const auto ok = cv.wait_for(lk, std::chrono::seconds(60), [&, this]() { return download_state != State::DOWNLOADING || offset < getCurrentWriteOffset(); }); @@ -511,10 +511,10 @@ bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milli bool is_file_segment_size_exceeded; { - auto lock = lockFileSegment(); + auto lk = lock(); - assertNotDetachedUnlocked(lock); - assertIsDownloaderUnlocked("reserve", lock); + assertNotDetachedUnlocked(lk); + assertIsDownloaderUnlocked("reserve", lk); expected_downloaded_size = getDownloadedSize(); @@ -557,7 +557,7 @@ bool FileSegment::reserve(size_t size_to_reserve, size_t lock_wait_timeout_milli bool reserved = cache->tryReserve(*this, size_to_reserve, *reserve_stat, getKeyMetadata()->user, lock_wait_timeout_milliseconds); if (!reserved) - setDownloadFailedUnlocked(lockFileSegment()); + setDownloadFailedUnlocked(lock()); return reserved; } @@ -582,8 +582,8 @@ void FileSegment::setDownloadedUnlocked(const FileSegmentGuard::Lock &) void FileSegment::setDownloadFailed() { - auto lock = lockFileSegment(); - setDownloadFailedUnlocked(lock); + auto lk = lock(); + setDownloadFailedUnlocked(lk); } void FileSegment::setDownloadFailedUnlocked(const FileSegmentGuard::Lock & lock) @@ -605,22 +605,22 @@ void FileSegment::setDownloadFailedUnlocked(const FileSegmentGuard::Lock & lock) void FileSegment::completePartAndResetDownloader() { - auto lock = lockFileSegment(); + auto lk = lock(); SCOPE_EXIT({ cv.notify_all(); }); - assertNotDetachedUnlocked(lock); - assertIsDownloaderUnlocked("completePartAndResetDownloader", lock); + assertNotDetachedUnlocked(lk); + assertIsDownloaderUnlocked("completePartAndResetDownloader", lk); chassert(download_state == State::DOWNLOADING || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION); if (download_state == State::DOWNLOADING) - resetDownloadingStateUnlocked(lock); + resetDownloadingStateUnlocked(lk); - resetDownloaderUnlocked(lock); + resetDownloaderUnlocked(lk); - LOG_TEST(log, "Complete batch. ({})", getInfoForLogUnlocked(lock)); + LOG_TEST(log, "Complete batch. ({})", getInfoForLogUnlocked(lk)); } void FileSegment::complete() @@ -640,7 +640,7 @@ void FileSegment::complete() throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot complete file segment: {}", getInfoForLog()); } - auto segment_lock = lockFileSegment(); + auto segment_lock = lock(); if (isCompleted(false)) return; @@ -756,8 +756,8 @@ void FileSegment::complete() String FileSegment::getInfoForLog() const { - auto lock = lockFileSegment(); - return getInfoForLogUnlocked(lock); + auto lk = lock(); + return getInfoForLogUnlocked(lk); } String FileSegment::getInfoForLogUnlocked(const FileSegmentGuard::Lock &) const @@ -799,7 +799,7 @@ String FileSegment::stateToString(FileSegment::State state) bool FileSegment::assertCorrectness() const { - return assertCorrectnessUnlocked(lockFileSegment()); + return assertCorrectnessUnlocked(lock()); } bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) const @@ -869,8 +869,8 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) void FileSegment::assertNotDetached() const { - auto lock = lockFileSegment(); - assertNotDetachedUnlocked(lock); + auto lk = lock(); + assertNotDetachedUnlocked(lk); } void FileSegment::assertNotDetachedUnlocked(const FileSegmentGuard::Lock & lock) const @@ -887,7 +887,7 @@ void FileSegment::assertNotDetachedUnlocked(const FileSegmentGuard::Lock & lock) FileSegment::Info FileSegment::getInfo(const FileSegmentPtr & file_segment) { - auto lock = file_segment->lockFileSegment(); + auto lock = file_segment->lock(); auto key_metadata = file_segment->tryGetKeyMetadata(); return Info{ .key = file_segment->key(), @@ -910,7 +910,7 @@ FileSegment::Info FileSegment::getInfo(const FileSegmentPtr & file_segment) bool FileSegment::isDetached() const { - auto lock = lockFileSegment(); + auto lk = lock(); return download_state == State::DETACHED; } @@ -926,7 +926,7 @@ bool FileSegment::isCompleted(bool sync) const if (is_completed_state()) return true; - auto lock = lockFileSegment(); + auto lk = lock(); return is_completed_state(); } diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 12046e59bd61..7793c50d2d5d 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -171,12 +171,13 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). * ========== Methods used by `cache` ======================== */ - FileSegmentGuard::Lock lock() const { return segment_guard.lock(); } + FileSegmentGuard::Lock lock() const; Priority::IteratorPtr getQueueIterator() const; void setQueueIterator(Priority::IteratorPtr iterator); - void setQueueIteratorUnlocked(Priority::IteratorPtr iterator, const FileSegmentGuard::Lock &); + + void resetQueueIterator(); KeyMetadataPtr tryGetKeyMetadata() const; @@ -242,7 +243,6 @@ friend class FileCache; /// Because of reserved_size in tryReserve(). bool assertCorrectnessUnlocked(const FileSegmentGuard::Lock &) const; LockedKeyPtr lockKeyMetadata(bool assert_exists = true) const; - FileSegmentGuard::Lock lockFileSegment() const; String tryGetPath() const; From 607a9d761babcae14c9d4ff64c8753900464e2b8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 2 Apr 2024 19:04:02 +0200 Subject: [PATCH 114/470] Fix --- src/Interpreters/Cache/EvictionCandidates.cpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index 4ca6aeea22ea..5fa2b337e648 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -67,11 +67,11 @@ void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & loc { for (const auto & candidate : key_candidates.candidates) { - const auto & file_segment = candidate->file_segment; - auto file_segment_lock = file_segment->lock(); + auto queue_iterator = candidate->getQueueIterator(); + queue_iterator->invalidate(); - candidate->getQueueIterator()->remove(lock); - file_segment->setQueueIteratorUnlocked(nullptr, file_segment_lock); + candidate->file_segment->resetQueueIterator(); + queue_iterator->remove(lock); } } removed_queue_entries = true; @@ -101,10 +101,14 @@ void EvictionCandidates::evict() { auto & candidate = key_candidates.candidates.back(); chassert(candidate->releasable()); - const auto segment = candidate->file_segment; - auto iterator = segment->getQueueIterator(); - chassert(iterator); + + IFileCachePriority::IteratorPtr iterator; + if (!removed_queue_entries) + { + iterator = segment->getQueueIterator(); + chassert(iterator); + } ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedFileSegments); ProfileEvents::increment(ProfileEvents::FilesystemCacheEvictedBytes, segment->range().size()); @@ -133,7 +137,7 @@ void EvictionCandidates::evict() /// it was freed in favour of some reserver, so we can make it visibly /// free only for that particular reserver. - if (!removed_queue_entries) + if (iterator) queue_entries_to_invalidate.push_back(iterator); key_candidates.candidates.pop_back(); From 2fd23a15653cef89400a914824e44a27c823beeb Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Apr 2024 19:05:39 +0200 Subject: [PATCH 115/470] Close: https://github.com/ClickHouse/ClickHouse/issues/9954 --- .../03047_analyzer_alias_join.reference | 2 ++ .../0_stateless/03047_analyzer_alias_join.sql | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/03047_analyzer_alias_join.reference create mode 100644 tests/queries/0_stateless/03047_analyzer_alias_join.sql diff --git a/tests/queries/0_stateless/03047_analyzer_alias_join.reference b/tests/queries/0_stateless/03047_analyzer_alias_join.reference new file mode 100644 index 000000000000..3bb920838494 --- /dev/null +++ b/tests/queries/0_stateless/03047_analyzer_alias_join.reference @@ -0,0 +1,2 @@ +1 1 \N +1 2 1 diff --git a/tests/queries/0_stateless/03047_analyzer_alias_join.sql b/tests/queries/0_stateless/03047_analyzer_alias_join.sql new file mode 100644 index 000000000000..a6f81516430c --- /dev/null +++ b/tests/queries/0_stateless/03047_analyzer_alias_join.sql @@ -0,0 +1,30 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/9954 +SELECT + 1 AS value, + * +FROM +( + SELECT 1 AS key +) AS l +LEFT JOIN +( + SELECT + 2 AS key, + 1 AS value +) AS r USING (key) +SETTINGS join_use_nulls = 1; + +SELECT + 1 AS value, + * +FROM +( + SELECT 2 AS key +) AS l +LEFT JOIN +( + SELECT + 2 AS key, + 1 AS value +) AS r USING (key) +SETTINGS join_use_nulls = 1 From a19bce649aef3798972e8fe4129bbc0ac4dbdd84 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:07:55 +0000 Subject: [PATCH 116/470] Close: https://github.com/ClickHouse/ClickHouse/issues/32639 --- .../03047_group_by_field_identified_aggregation.reference | 1 + .../03047_group_by_field_identified_aggregation.sql | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/03047_group_by_field_identified_aggregation.reference create mode 100644 tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql diff --git a/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.reference b/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.reference new file mode 100644 index 000000000000..556d825db42a --- /dev/null +++ b/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.reference @@ -0,0 +1 @@ +2 1 diff --git a/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql b/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql new file mode 100644 index 000000000000..e1363ea4ddac --- /dev/null +++ b/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql @@ -0,0 +1,3 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/32639 + +SELECT 0 AND id ? 1 : 2 AS a, sum(id) FROM (SELECT 1 AS id) GROUP BY a; From a3f973662289a7ad218f8bf7994b207b4891e64f Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:13:13 +0000 Subject: [PATCH 117/470] Close: https://github.com/ClickHouse/ClickHouse/issues/41964 --- ...48_not_found_column_xxx_in_block.reference | 0 .../03048_not_found_column_xxx_in_block.sql | 66 +++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 tests/queries/0_stateless/03048_not_found_column_xxx_in_block.reference create mode 100644 tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql diff --git a/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.reference b/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql b/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql new file mode 100644 index 000000000000..25f88050eb12 --- /dev/null +++ b/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql @@ -0,0 +1,66 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/41964 + +DROP TABLE IF EXISTS ab_12_aaa; +DROP TABLE IF EXISTS ab_12_bbb; + +CREATE TABLE ab_12_aaa +( + `id` String, + `subid` Int32, + `prodcat` String, + `prodtype` String, + `quality` String, + `m1` Float64, + `m2` Float64, + `r1` Float64, + `r2` Float64, + `d1` Float64, + `d2` Float64, + `pcs` Float64, + `qty` Float64, + `amt` Float64, + `amts` Float64, + `prc` Float64, + `prcs` Float64, + `suqty` Float64, + `suamt` Float64, + `_year` String +) +ENGINE = MergeTree +ORDER BY (_year, prodcat, prodtype, quality, d1, id) +SETTINGS index_granularity = 8192; + +CREATE TABLE ab_12_bbb +( + `id` String, + `sales_type` String, + `date` Date32, + `o1` String, + `o2` String, + `o3` String, + `o4` String, + `o5` String, + `short` String, + `a1` String, + `a2` String, + `a3` String, + `idx` String, + `a4` String, + `ctx` String, + `_year` String, + `theyear` UInt16 MATERIALIZED toYear(`date`), + `themonth` UInt8 MATERIALIZED toMonth(`date`), + `theweek` UInt8 MATERIALIZED toISOWeek(`date`) +) +ENGINE = MergeTree +ORDER BY (theyear, themonth, _year, id, sales_type, date) +SETTINGS index_granularity = 8192; + +SELECT * +FROM ab_12_aaa aa +LEFT JOIN ab_12_bbb bb +ON bb.id = aa.id AND bb.`_year` = aa.`_year` +WHERE bb.theyear >= 2019; + +DROP TABLE IF EXISTS ab_12_aaa; +DROP TABLE IF EXISTS ab_12_bbb; From 14820032799b252091910a3fd999866078976761 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Tue, 2 Apr 2024 19:16:49 +0200 Subject: [PATCH 118/470] Close: https://github.com/ClickHouse/ClickHouse/issues/7520 --- .../03049_analyzer_group_by_alias.reference | 7 ++++++ .../03049_analyzer_group_by_alias.sql | 22 +++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 tests/queries/0_stateless/03049_analyzer_group_by_alias.reference create mode 100644 tests/queries/0_stateless/03049_analyzer_group_by_alias.sql diff --git a/tests/queries/0_stateless/03049_analyzer_group_by_alias.reference b/tests/queries/0_stateless/03049_analyzer_group_by_alias.reference new file mode 100644 index 000000000000..dac5487d4455 --- /dev/null +++ b/tests/queries/0_stateless/03049_analyzer_group_by_alias.reference @@ -0,0 +1,7 @@ +1 5 +2 4 +1 +2 +1 +1 +2 diff --git a/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql b/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql new file mode 100644 index 000000000000..67df48e0cad6 --- /dev/null +++ b/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql @@ -0,0 +1,22 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/7520 +CREATE TABLE test (`a` UInt32, `b` UInt32) ENGINE = Memory; + +INSERT INTO test VALUES (1,2), (1,3), (2,4); + +-- 1 5 +-- 2 4 + +WITH + a as key +SELECT + a as k1, + sum(b) as k2 +FROM + test +GROUP BY + key +ORDER BY k1, k2; + +WITH a as key SELECT key as k1 FROM test GROUP BY key ORDER BY key; + +WITH a as key SELECT key as k1 FROM test ORDER BY key; From 167c993503f62ea2e9b91816a17e25ae48aaf98d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:17:10 +0000 Subject: [PATCH 119/470] Close: https://github.com/ClickHouse/ClickHouse/issues/54317 --- ...9_unknown_identifier_materialized_column.sql | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql diff --git a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql new file mode 100644 index 000000000000..2f12799addb3 --- /dev/null +++ b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/54317 +DROP DATABASE IF EXISTS 03049_database; +DROP TABLE IF EXISTS 03049_database.l; +DROP TABLE IF EXISTS 03049_database.r; + +CREATE DATABASE 03049_database; +USE 03049_database; + +CREATE TABLE l (y String) Engine Memory; +CREATE TABLE r (d Date, y String, ty UInt16 MATERIALIZED toYear(d)) Engine Memory; +select * from l L left join r R on L.y = R.y where R.ty >= 2019; +select * from l left join r on l.y = r.y where r.ty >= 2019; +select * from 03049_database.l left join 03049_database.r on l.y = r.y where r.ty >= 2019; + +DROP DATABASE IF EXISTS 03049_database; +DROP TABLE IF EXISTS 03049_database.l; +DROP TABLE IF EXISTS 03049_database.r; From e7e20acc5b13d4a84754d42a38356f3b009531c0 Mon Sep 17 00:00:00 2001 From: justindeguzman Date: Tue, 2 Apr 2024 10:22:57 -0700 Subject: [PATCH 120/470] Bump From 51b2a8bce250867a9c54408e2d4f8e72887d68ab Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:23:20 +0000 Subject: [PATCH 121/470] Close: 111 --- tests/queries/0_stateless/03050_select_one_one_one.reference | 2 ++ tests/queries/0_stateless/03050_select_one_one_one.sql | 3 +++ 2 files changed, 5 insertions(+) create mode 100644 tests/queries/0_stateless/03050_select_one_one_one.reference create mode 100644 tests/queries/0_stateless/03050_select_one_one_one.sql diff --git a/tests/queries/0_stateless/03050_select_one_one_one.reference b/tests/queries/0_stateless/03050_select_one_one_one.reference new file mode 100644 index 000000000000..85e6138dc5d6 --- /dev/null +++ b/tests/queries/0_stateless/03050_select_one_one_one.reference @@ -0,0 +1,2 @@ +1 1 1 +1 1 1 diff --git a/tests/queries/0_stateless/03050_select_one_one_one.sql b/tests/queries/0_stateless/03050_select_one_one_one.sql new file mode 100644 index 000000000000..eee973fe9365 --- /dev/null +++ b/tests/queries/0_stateless/03050_select_one_one_one.sql @@ -0,0 +1,3 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/36973 +SELECT 1, 1, 1; +SELECT * FROM (SELECT 1, 1, 1); From d9048766933a11b283e0e2345d6bc6c9d0a57699 Mon Sep 17 00:00:00 2001 From: justindeguzman Date: Tue, 2 Apr 2024 10:23:50 -0700 Subject: [PATCH 122/470] Bump From acf1fb3a6cf3e79899515641848e3214db0c36f6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:25:13 +0000 Subject: [PATCH 123/470] Forgotten file --- .../03049_unknown_identifier_materialized_column.reference | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/queries/0_stateless/03049_unknown_identifier_materialized_column.reference diff --git a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.reference b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 From 9ffe6a4640e6197d653c1c5073a98cc659d3f8f6 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:26:18 +0000 Subject: [PATCH 124/470] Close: https://github.com/ClickHouse/ClickHouse/issues/40955 --- tests/queries/0_stateless/03051_many_ctes.reference | 4 ++++ tests/queries/0_stateless/03051_many_ctes.sql | 5 +++++ 2 files changed, 9 insertions(+) create mode 100644 tests/queries/0_stateless/03051_many_ctes.reference create mode 100644 tests/queries/0_stateless/03051_many_ctes.sql diff --git a/tests/queries/0_stateless/03051_many_ctes.reference b/tests/queries/0_stateless/03051_many_ctes.reference new file mode 100644 index 000000000000..487b1165348b --- /dev/null +++ b/tests/queries/0_stateless/03051_many_ctes.reference @@ -0,0 +1,4 @@ +2 +2 +2 +2 diff --git a/tests/queries/0_stateless/03051_many_ctes.sql b/tests/queries/0_stateless/03051_many_ctes.sql new file mode 100644 index 000000000000..412a1e6b544c --- /dev/null +++ b/tests/queries/0_stateless/03051_many_ctes.sql @@ -0,0 +1,5 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/40955 +WITH toInt64(2) AS new_x SELECT new_x AS x FROM (SELECT 1 AS x) t; +WITH toInt64(2) AS new_x SELECT * replace(new_x as x) FROM (SELECT 1 AS x) t; +SELECT 2 AS x FROM (SELECT 1 AS x) t; +SELECT * replace(2 as x) FROM (SELECT 1 AS x) t; From 9673e0a9fcf3006f791da2c0c28cfa4d7b5ac9c7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 17:29:01 +0000 Subject: [PATCH 125/470] Just a test --- ...3052_query_hash_includes_aliases.reference | 2 ++ .../03052_query_hash_includes_aliases.sql | 31 +++++++++++++++++++ 2 files changed, 33 insertions(+) create mode 100644 tests/queries/0_stateless/03052_query_hash_includes_aliases.reference create mode 100644 tests/queries/0_stateless/03052_query_hash_includes_aliases.sql diff --git a/tests/queries/0_stateless/03052_query_hash_includes_aliases.reference b/tests/queries/0_stateless/03052_query_hash_includes_aliases.reference new file mode 100644 index 000000000000..570d7be9c4bf --- /dev/null +++ b/tests/queries/0_stateless/03052_query_hash_includes_aliases.reference @@ -0,0 +1,2 @@ +(1,1) (1,0) +(3,4) (3,11) diff --git a/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql b/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql new file mode 100644 index 000000000000..e76108c78429 --- /dev/null +++ b/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql @@ -0,0 +1,31 @@ +-- https://github.com/ClickHouse/ClickHouse/pull/40065 + +SELECT +( + SELECT + 1 AS number, + number + FROM numbers(1) +) AS s, +( + SELECT + 1, + number + FROM numbers(1) +) AS s2; + +SELECT +( + SELECT + 1 + 2 AS number, + 1 + number AS b + FROM system.numbers + LIMIT 10, 1 +), +( + SELECT + 1 + 2 AS number2, + 1 + number AS b + FROM system.numbers + LIMIT 10, 1 +); From 593abc4037ecc420cd713f62bc166738071ec57b Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 19:36:34 +0200 Subject: [PATCH 126/470] Update 03047_analyzer_alias_join.sql --- tests/queries/0_stateless/03047_analyzer_alias_join.sql | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/03047_analyzer_alias_join.sql b/tests/queries/0_stateless/03047_analyzer_alias_join.sql index a6f81516430c..ef8c067bb72d 100644 --- a/tests/queries/0_stateless/03047_analyzer_alias_join.sql +++ b/tests/queries/0_stateless/03047_analyzer_alias_join.sql @@ -1,4 +1,3 @@ --- https://github.com/ClickHouse/ClickHouse/issues/9954 SELECT 1 AS value, * From 4fc09fb746ef7902d01d54cf9d2f538d168ec9b9 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 20:03:56 +0200 Subject: [PATCH 127/470] Update 03034_normalized_ast.sql --- tests/queries/0_stateless/03034_normalized_ast.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03034_normalized_ast.sql b/tests/queries/0_stateless/03034_normalized_ast.sql index ff6f8da118cd..fe801cd8cf4d 100644 --- a/tests/queries/0_stateless/03034_normalized_ast.sql +++ b/tests/queries/0_stateless/03034_normalized_ast.sql @@ -3,5 +3,6 @@ SELECT concat(database, table) AS name, count() FROM clusterAllReplicas(default, system.tables) +WHERE database=currentDatabase() GROUP BY name FORMAT Null; From 69b393cf3eef9d22d180af3e36debf5128b4c27c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Apr 2024 20:35:50 +0200 Subject: [PATCH 128/470] Fxi standalone build --- src/Common/ThreadStatus.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 2d33c0ac0214..550cb76e7366 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -1,5 +1,6 @@ #pragma once +#include "config.h" #include #include #include @@ -307,7 +308,12 @@ class ThreadStatus : public boost::noncopyable void flushUntrackedMemory(); +#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD + void initGlobalProfiler(UInt64, UInt64) {} +#else void initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period); +#endif + private: void applyGlobalSettings(); void applyQuerySettings(); From c2995b13e21b49bf490ef603d237361b639400a9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 2 Apr 2024 20:44:52 +0200 Subject: [PATCH 129/470] Use ClickHouse threads in NuRaft --- contrib/NuRaft | 2 +- contrib/nuraft-cmake/CMakeLists.txt | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 4a12f99dfc9d..717657cd94da 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 4a12f99dfc9d47c687ff7700b927cc76856225d1 +Subproject commit 717657cd94da01e86733e58f8d3f0ca0d8748712 diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index eaca00566d61..e5d0d6ccb4e2 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -50,6 +50,10 @@ else() target_compile_definitions(_nuraft PRIVATE USE_BOOST_ASIO=1 BOOST_ASIO_STANDALONE=1) endif() +target_link_libraries (_nuraft PRIVATE clickhouse_common_io) +target_compile_definitions(_nuraft PRIVATE USE_CLICKHOUSE_THREADS=1) +MESSAGE(STATUS "Will use clickhouse threads for NuRaft") + target_include_directories (_nuraft SYSTEM PRIVATE "${LIBRARY_DIR}/include/libnuraft") # for some reason include "asio.h" directly without "boost/" prefix. target_include_directories (_nuraft SYSTEM PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/boost/boost") From fb2163ed7c2159b3444a2c6ceec459ee77a08322 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 21:24:13 +0200 Subject: [PATCH 130/470] Apply suggestions from code review --- tests/queries/0_stateless/03034_normalized_ast.sql | 2 +- .../queries/0_stateless/03035_alias_column_bug_distributed.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03034_normalized_ast.sql b/tests/queries/0_stateless/03034_normalized_ast.sql index fe801cd8cf4d..dd109eb5113e 100644 --- a/tests/queries/0_stateless/03034_normalized_ast.sql +++ b/tests/queries/0_stateless/03034_normalized_ast.sql @@ -2,7 +2,7 @@ SELECT concat(database, table) AS name, count() -FROM clusterAllReplicas(default, system.tables) +FROM clusterAllReplicas(test_shard_localhost, system.tables) WHERE database=currentDatabase() GROUP BY name FORMAT Null; diff --git a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql index fb459b3289b0..5fd1f3974255 100644 --- a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql +++ b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql @@ -11,7 +11,7 @@ ORDER BY src; CREATE TABLE alias_bug_dist AS alias_bug -ENGINE = Distributed('default', currentDatabase(), 'alias_bug', rand()); +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'alias_bug', rand()); INSERT INTO alias_bug VALUES ('SOURCE1'); From 81f210be9328d617e1083283cbd3b654856120fc Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 21:53:03 +0200 Subject: [PATCH 131/470] Update tests/queries/0_stateless/03035_alias_column_bug_distributed.sql --- .../queries/0_stateless/03035_alias_column_bug_distributed.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql index 5fd1f3974255..3a7b4890bf01 100644 --- a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql +++ b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql @@ -34,7 +34,7 @@ ORDER BY src; CREATE TABLE alias_bug_dist AS alias_bug -ENGINE = Distributed('default', currentDatabase(), 'alias_bug', rand()); +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'alias_bug', rand()); -- Unknown identifier SELECT CAST(123, 'String') AS src,theAlias FROM alias_bug_dist LIMIT 1 FORMAT Null; From 606058c1ca489f8fcc77ade96d5d1e39573a0628 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 2 Apr 2024 13:29:07 +0000 Subject: [PATCH 132/470] Consolidate SQL compat alias docs into data type docs + improve sidebar order --- .../data-types/aggregatefunction.md | 2 +- docs/en/sql-reference/data-types/array.md | 2 +- docs/en/sql-reference/data-types/boolean.md | 2 +- docs/en/sql-reference/data-types/date.md | 2 +- docs/en/sql-reference/data-types/date32.md | 2 +- docs/en/sql-reference/data-types/datetime.md | 2 +- .../en/sql-reference/data-types/datetime64.md | 2 +- docs/en/sql-reference/data-types/decimal.md | 2 +- docs/en/sql-reference/data-types/enum.md | 2 +- .../sql-reference/data-types/fixedstring.md | 4 +-- docs/en/sql-reference/data-types/float.md | 2 +- docs/en/sql-reference/data-types/geo.md | 4 +-- docs/en/sql-reference/data-types/index.md | 4 +-- docs/en/sql-reference/data-types/int-uint.md | 2 +- docs/en/sql-reference/data-types/ipv4.md | 2 +- docs/en/sql-reference/data-types/ipv6.md | 2 +- docs/en/sql-reference/data-types/json.md | 2 +- .../data-types/lowcardinality.md | 6 ++-- docs/en/sql-reference/data-types/map.md | 8 +++--- .../data-types/multiword-types.md | 27 ------------------ docs/en/sql-reference/data-types/nullable.md | 4 +-- .../data-types/simpleaggregatefunction.md | 2 ++ docs/en/sql-reference/data-types/string.md | 4 +-- docs/en/sql-reference/data-types/tuple.md | 4 +-- docs/en/sql-reference/data-types/uuid.md | 2 +- docs/en/sql-reference/data-types/variant.md | 6 ++-- .../data-types/multiword-types.md | 28 ------------------- .../data-types/multiword-types.mdx | 10 ------- 28 files changed, 39 insertions(+), 102 deletions(-) delete mode 100644 docs/en/sql-reference/data-types/multiword-types.md delete mode 100644 docs/ru/sql-reference/data-types/multiword-types.md delete mode 100644 docs/zh/sql-reference/data-types/multiword-types.mdx diff --git a/docs/en/sql-reference/data-types/aggregatefunction.md b/docs/en/sql-reference/data-types/aggregatefunction.md index fe6d7ebe0dc4..87511a505dc9 100644 --- a/docs/en/sql-reference/data-types/aggregatefunction.md +++ b/docs/en/sql-reference/data-types/aggregatefunction.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/aggregatefunction -sidebar_position: 53 +sidebar_position: 46 sidebar_label: AggregateFunction --- diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 0ee7c8de93ce..e5a8ce5d18b1 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/array -sidebar_position: 52 +sidebar_position: 32 sidebar_label: Array(T) --- diff --git a/docs/en/sql-reference/data-types/boolean.md b/docs/en/sql-reference/data-types/boolean.md index 70abf767a41b..4c59bd947ded 100644 --- a/docs/en/sql-reference/data-types/boolean.md +++ b/docs/en/sql-reference/data-types/boolean.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/boolean -sidebar_position: 43 +sidebar_position: 22 sidebar_label: Boolean --- diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index 26e4610aec76..7adee3bbf3cf 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/date -sidebar_position: 47 +sidebar_position: 12 sidebar_label: Date --- diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index 38a07cd817da..a08c931b7fc3 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/date32 -sidebar_position: 48 +sidebar_position: 14 sidebar_label: Date32 --- diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 1adff18f598e..889bc682d915 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/datetime -sidebar_position: 48 +sidebar_position: 16 sidebar_label: DateTime --- diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 504d0e2b0a6d..ef452a723e6b 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/datetime64 -sidebar_position: 49 +sidebar_position: 18 sidebar_label: DateTime64 --- diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index 2b32e72a28f9..dfdefdff5a5e 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/decimal -sidebar_position: 42 +sidebar_position: 6 sidebar_label: Decimal --- diff --git a/docs/en/sql-reference/data-types/enum.md b/docs/en/sql-reference/data-types/enum.md index 02e73a0360ea..ccfeb7f3416a 100644 --- a/docs/en/sql-reference/data-types/enum.md +++ b/docs/en/sql-reference/data-types/enum.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/enum -sidebar_position: 50 +sidebar_position: 20 sidebar_label: Enum --- diff --git a/docs/en/sql-reference/data-types/fixedstring.md b/docs/en/sql-reference/data-types/fixedstring.md index a56b3fccbc17..0316df7fe348 100644 --- a/docs/en/sql-reference/data-types/fixedstring.md +++ b/docs/en/sql-reference/data-types/fixedstring.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/fixedstring -sidebar_position: 45 +sidebar_position: 10 sidebar_label: FixedString(N) --- -# FixedString +# FixedString(N) A fixed-length string of `N` bytes (neither characters nor code points). diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index be7b2a7fcd87..23131d5b4fe3 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/float -sidebar_position: 41 +sidebar_position: 4 sidebar_label: Float32, Float64 --- diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 1d37b829dd56..7e3c32b34517 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -1,8 +1,8 @@ --- slug: /en/sql-reference/data-types/geo -sidebar_position: 62 +sidebar_position: 54 sidebar_label: Geo -title: "Geo Data Types" +title: "Geometric" --- ClickHouse supports data types for representing geographical objects — locations, lands, etc. diff --git a/docs/en/sql-reference/data-types/index.md b/docs/en/sql-reference/data-types/index.md index ffd063590fa8..fcb0b60d0226 100644 --- a/docs/en/sql-reference/data-types/index.md +++ b/docs/en/sql-reference/data-types/index.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/ sidebar_label: List of data types -sidebar_position: 37 +sidebar_position: 1 --- -# ClickHouse Data Types +# Data Types in ClickHouse ClickHouse can store various kinds of data in table cells. This section describes the supported data types and special considerations for using and/or implementing them if any. diff --git a/docs/en/sql-reference/data-types/int-uint.md b/docs/en/sql-reference/data-types/int-uint.md index 520454a859fb..52d2982de19e 100644 --- a/docs/en/sql-reference/data-types/int-uint.md +++ b/docs/en/sql-reference/data-types/int-uint.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/int-uint -sidebar_position: 40 +sidebar_position: 2 sidebar_label: UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 --- diff --git a/docs/en/sql-reference/data-types/ipv4.md b/docs/en/sql-reference/data-types/ipv4.md index 288806f47b35..637ed543e084 100644 --- a/docs/en/sql-reference/data-types/ipv4.md +++ b/docs/en/sql-reference/data-types/ipv4.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/ipv4 -sidebar_position: 59 +sidebar_position: 28 sidebar_label: IPv4 --- diff --git a/docs/en/sql-reference/data-types/ipv6.md b/docs/en/sql-reference/data-types/ipv6.md index 97959308b58d..642a7db81fc3 100644 --- a/docs/en/sql-reference/data-types/ipv6.md +++ b/docs/en/sql-reference/data-types/ipv6.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/ipv6 -sidebar_position: 60 +sidebar_position: 30 sidebar_label: IPv6 --- diff --git a/docs/en/sql-reference/data-types/json.md b/docs/en/sql-reference/data-types/json.md index fd548a0d5a28..39e37abad82c 100644 --- a/docs/en/sql-reference/data-types/json.md +++ b/docs/en/sql-reference/data-types/json.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/json -sidebar_position: 54 +sidebar_position: 26 sidebar_label: JSON --- diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index db10103282d4..133ac2bd72eb 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/lowcardinality -sidebar_position: 51 -sidebar_label: LowCardinality +sidebar_position: 42 +sidebar_label: LowCardinality(T) --- -# LowCardinality +# LowCardinality(T) Changes the internal representation of other data types to be dictionary-encoded. diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index e0c8b98f9f83..2c734969afcb 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -1,12 +1,12 @@ --- slug: /en/sql-reference/data-types/map -sidebar_position: 65 -sidebar_label: Map(key, value) +sidebar_position: 36 +sidebar_label: Map(K, V) --- -# Map(key, value) +# Map(K, V) -`Map(key, value)` data type stores `key:value` pairs. +`Map(K, V)` data type stores `key:value` pairs. **Parameters** diff --git a/docs/en/sql-reference/data-types/multiword-types.md b/docs/en/sql-reference/data-types/multiword-types.md deleted file mode 100644 index ebbe1d845447..000000000000 --- a/docs/en/sql-reference/data-types/multiword-types.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -slug: /en/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Multiword Type Names -title: "Multiword Types" ---- - -When creating tables, you can use data types with a name consisting of several words. This is implemented for better SQL compatibility. - -## Multiword Types Support - -| Multiword types | Simple types | -|----------------------------------|--------------------------------------------------------------| -| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | -| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | -| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/en/sql-reference/data-types/nullable.md b/docs/en/sql-reference/data-types/nullable.md index 5504765e4a08..abcb87a0c1b1 100644 --- a/docs/en/sql-reference/data-types/nullable.md +++ b/docs/en/sql-reference/data-types/nullable.md @@ -1,7 +1,7 @@ --- slug: /en/sql-reference/data-types/nullable -sidebar_position: 55 -sidebar_label: Nullable +sidebar_position: 44 +sidebar_label: Nullable(T) --- # Nullable(T) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index 517a28576f03..39f8409c1e1a 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -1,5 +1,7 @@ --- slug: /en/sql-reference/data-types/simpleaggregatefunction +sidebar_position: 48 +sidebar_label: SimpleAggregateFunction --- # SimpleAggregateFunction diff --git a/docs/en/sql-reference/data-types/string.md b/docs/en/sql-reference/data-types/string.md index f891a9303e58..8a4f346fdfc8 100644 --- a/docs/en/sql-reference/data-types/string.md +++ b/docs/en/sql-reference/data-types/string.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/string -sidebar_position: 44 +sidebar_position: 8 sidebar_label: String --- @@ -13,7 +13,7 @@ When creating tables, numeric parameters for string fields can be set (e.g. `VAR Aliases: -- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`. +- `String` — `LONGTEXT`, `MEDIUMTEXT`, `TINYTEXT`, `TEXT`, `LONGBLOB`, `MEDIUMBLOB`, `TINYBLOB`, `BLOB`, `VARCHAR`, `CHAR`, `CHAR LARGE OBJECT`, `CHAR VARYING`, `CHARACTER LARGE OBJECT`, `CHARACTER VARYING`, `NCHAR LARGE OBJECT`, `NCHAR VARYING`, `NATIONAL CHARACTER LARGE OBJECT`, `NATIONAL CHARACTER VARYING`, `NATIONAL CHAR VARYING`, `NATIONAL CHARACTER`, `NATIONAL CHAR`, `BINARY LARGE OBJECT`, `BINARY VARYING`, ## Encodings diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index 8f87eeca075f..0525a3b04766 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/tuple -sidebar_position: 54 +sidebar_position: 34 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(T1, T2, …) +# Tuple(T1, T2, ...) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element. diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index 40f756b95888..75e163f50639 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/data-types/uuid -sidebar_position: 46 +sidebar_position: 24 sidebar_label: UUID --- diff --git a/docs/en/sql-reference/data-types/variant.md b/docs/en/sql-reference/data-types/variant.md index 7d10d4b0e977..1a9f1dde8d38 100644 --- a/docs/en/sql-reference/data-types/variant.md +++ b/docs/en/sql-reference/data-types/variant.md @@ -1,10 +1,10 @@ --- slug: /en/sql-reference/data-types/variant -sidebar_position: 55 -sidebar_label: Variant +sidebar_position: 40 +sidebar_label: Variant(T1, T2, ...) --- -# Variant(T1, T2, T3, ...) +# Variant(T1, T2, ...) This type represents a union of other data types. Type `Variant(T1, T2, ..., TN)` means that each row of this type has a value of either type `T1` or `T2` or ... or `TN` or none of them (`NULL` value). diff --git a/docs/ru/sql-reference/data-types/multiword-types.md b/docs/ru/sql-reference/data-types/multiword-types.md deleted file mode 100644 index cca2d71e480b..000000000000 --- a/docs/ru/sql-reference/data-types/multiword-types.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -slug: /ru/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Составные типы ---- - -# Составные типы {#multiword-types} - -При создании таблиц вы можете использовать типы данных с названием, состоящим из нескольких слов. Такие названия поддерживаются для лучшей совместимости с SQL. - -## Поддержка составных типов {#multiword-types-support} - -| Составные типы | Обычные типы | -|-------------------------------------|-----------------------------------------------------------| -| DOUBLE PRECISION | [Float64](../../sql-reference/data-types/float.md) | -| CHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NCHAR LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NCHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR VARYING | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHARACTER | [String](../../sql-reference/data-types/string.md) | -| NATIONAL CHAR | [String](../../sql-reference/data-types/string.md) | -| BINARY LARGE OBJECT | [String](../../sql-reference/data-types/string.md) | -| BINARY VARYING | [String](../../sql-reference/data-types/string.md) | diff --git a/docs/zh/sql-reference/data-types/multiword-types.mdx b/docs/zh/sql-reference/data-types/multiword-types.mdx deleted file mode 100644 index 85431d47efd6..000000000000 --- a/docs/zh/sql-reference/data-types/multiword-types.mdx +++ /dev/null @@ -1,10 +0,0 @@ ---- -slug: /zh/sql-reference/data-types/multiword-types -sidebar_position: 61 -sidebar_label: Multiword Type Names -title: "Multiword Types" ---- - -import Content from '@site/docs/en/sql-reference/data-types/multiword-types.md'; - - From cf8a83bb36a5b4072b100301abf267d74d8a4874 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 2 Apr 2024 21:05:37 +0000 Subject: [PATCH 133/470] Bump From 0ad3ed5ae3deb4e5077a8d18cf9fb16be1495d01 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 2 Apr 2024 18:38:58 -0300 Subject: [PATCH 134/470] Add support for S3 access through aws private link interface --- src/IO/S3/URI.cpp | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 027cb624ed5c..b007d9268b7b 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -33,12 +33,18 @@ namespace S3 URI::URI(const std::string & uri_) { /// Case when bucket name represented in domain name of S3 URL. - /// E.g. (https://bucket-name.s3.Region.amazonaws.com/key) + /// E.g. (https://bucket-name.s3.region.amazonaws.com/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#virtual-hosted-style-access static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))"); + /// Case when AWS Private Link Interface is being used + /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com) + /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html +// static const RE2 aws_private_link_style_pattern("bucket\\.vpce\\-([a-z0-9\\-.:]+)\\.vpce.amazonaws.com/([^/]*)/(.*)"); + static const RE2 aws_private_link_style_pattern("bucket\\.vpce\\-([a-z0-9\\-.:]+)\\.vpce.amazonaws.com"); + /// Case when bucket name and key represented in path of S3 URL. - /// E.g. (https://s3.Region.amazonaws.com/bucket-name/key) + /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html#path-style-access static const RE2 path_style_pattern("^/([^/]*)/(.*)"); @@ -103,7 +109,19 @@ URI::URI(const std::string & uri_) String name; String endpoint_authority_from_uri; - if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri)) + if (re2::RE2::FullMatch(uri.getAuthority(), aws_private_link_style_pattern)) + { + if (!re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", + quoteString("ada")); + } + is_virtual_hosted_style = true; + endpoint = uri.getScheme() + "://" + uri.getAuthority(); + validateBucket(bucket, uri); + } + else if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri)) { is_virtual_hosted_style = true; endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri; From c1fd9262a7126fa9a8cbd3e18942eb499ce93965 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Tue, 2 Apr 2024 18:42:57 -0300 Subject: [PATCH 135/470] change exception placeholder --- src/IO/S3/URI.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index b007d9268b7b..69b539cde8be 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -38,9 +38,8 @@ URI::URI(const std::string & uri_) static const RE2 virtual_hosted_style_pattern(R"((.+)\.(s3express[\-a-z0-9]+|s3|cos|obs|oss|eos)([.\-][a-z0-9\-.:]+))"); /// Case when AWS Private Link Interface is being used - /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com) + /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html -// static const RE2 aws_private_link_style_pattern("bucket\\.vpce\\-([a-z0-9\\-.:]+)\\.vpce.amazonaws.com/([^/]*)/(.*)"); static const RE2 aws_private_link_style_pattern("bucket\\.vpce\\-([a-z0-9\\-.:]+)\\.vpce.amazonaws.com"); /// Case when bucket name and key represented in path of S3 URL. @@ -113,10 +112,10 @@ URI::URI(const std::string & uri_) { if (!re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key)) { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", - quoteString("ada")); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not parse bucket and key from uri {}", uri.toString()); } + + // Default to virtual hosted style is_virtual_hosted_style = true; endpoint = uri.getScheme() + "://" + uri.getAuthority(); validateBucket(bucket, uri); From 44fa62c0a49c1dd847a60b4863c6cfe949e40672 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sat, 30 Mar 2024 01:28:14 +0100 Subject: [PATCH 136/470] Add test. --- src/Common/randomDelay.cpp | 39 ++++++ src/Common/randomDelay.h | 8 ++ src/Storages/MergeTree/DataPartsExchange.cpp | 5 + src/Storages/StorageReplicatedMergeTree.cpp | 5 + .../{remote_servers.xml => cluster.xml} | 16 --- .../configs/cluster3.xml | 20 +++ .../configs/slow_replicated_merge_tree.xml | 10 ++ .../test_backup_restore_on_cluster/test.py | 3 +- .../test_slow_rmt.py | 119 ++++++++++++++++++ 9 files changed, 208 insertions(+), 17 deletions(-) create mode 100644 src/Common/randomDelay.cpp create mode 100644 src/Common/randomDelay.h rename tests/integration/test_backup_restore_on_cluster/configs/{remote_servers.xml => cluster.xml} (56%) create mode 100644 tests/integration/test_backup_restore_on_cluster/configs/cluster3.xml create mode 100644 tests/integration/test_backup_restore_on_cluster/configs/slow_replicated_merge_tree.xml create mode 100644 tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py diff --git a/src/Common/randomDelay.cpp b/src/Common/randomDelay.cpp new file mode 100644 index 000000000000..7f6f30849198 --- /dev/null +++ b/src/Common/randomDelay.cpp @@ -0,0 +1,39 @@ +#include + +#include +#include +#include + + +void randomDelayForMaxMilliseconds(uint64_t milliseconds, LoggerPtr log, const char * start_of_message) +{ + if (milliseconds) + { + auto count = randomNumber() % milliseconds; + + if (log) + { + if (start_of_message && !*start_of_message) + start_of_message = nullptr; + + LOG_TEST(log, "{}{}Sleeping for {} milliseconds", + (start_of_message ? start_of_message : ""), + (start_of_message ? ": " : ""), + count); + } + + sleepForMilliseconds(count); + + if (log) + { + LOG_TEST(log, "{}{}Awaking after sleeping", + (start_of_message ? start_of_message : ""), + (start_of_message ? ": " : "")); + } + } +} + +void randomDelayForMaxSeconds(uint64_t seconds, LoggerPtr log, const char * start_of_message) +{ + randomDelayForMaxMilliseconds(seconds * 1000, log, start_of_message); +} diff --git a/src/Common/randomDelay.h b/src/Common/randomDelay.h new file mode 100644 index 000000000000..99f218cc8a17 --- /dev/null +++ b/src/Common/randomDelay.h @@ -0,0 +1,8 @@ +#pragma once + +#include + +/// Sleeps for random duration between 0 and a specified number of milliseconds, optionally outputs a logging message about that. +/// This function can be used to add random delays in tests. +void randomDelayForMaxMilliseconds(uint64_t milliseconds, LoggerPtr log = nullptr, const char * start_of_message = nullptr); +void randomDelayForMaxSeconds(uint64_t seconds, LoggerPtr log = nullptr, const char * start_of_message = nullptr); diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 6bb5ff5a4ab4..91444d76a521 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -120,6 +121,10 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write LOG_TRACE(log, "Sending part {}", part_name); + static const auto test_delay = data.getContext()->getConfigRef().getUInt64("test.data_parts_exchange.delay_before_sending_part_ms", 0); + if (test_delay) + randomDelayForMaxMilliseconds(test_delay, log, "DataPartsExchange: Before sending part"); + MergeTreeData::DataPartPtr part; auto report_broken_part = [&]() diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7ca508c362d1..4de53d90a333 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include @@ -3093,6 +3094,10 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo Strings active_parts = get_part_set.getParts(); /// Remove local parts if source replica does not have them, because such parts will never be fetched by other replicas. + static const auto test_delay = getContext()->getConfigRef().getUInt64("test.clone_replica.delay_before_removing_local_parts_ms", 0); + if (test_delay) + randomDelayForMaxMilliseconds(test_delay, log.load(), "cloneReplica: Before removing local parts"); + Strings local_parts_in_zk = zookeeper->getChildren(fs::path(replica_path) / "parts"); Strings parts_to_remove_from_zk; diff --git a/tests/integration/test_backup_restore_on_cluster/configs/remote_servers.xml b/tests/integration/test_backup_restore_on_cluster/configs/cluster.xml similarity index 56% rename from tests/integration/test_backup_restore_on_cluster/configs/remote_servers.xml rename to tests/integration/test_backup_restore_on_cluster/configs/cluster.xml index c3bb226c1f40..1f7cb8155eb3 100644 --- a/tests/integration/test_backup_restore_on_cluster/configs/remote_servers.xml +++ b/tests/integration/test_backup_restore_on_cluster/configs/cluster.xml @@ -20,21 +20,5 @@ - - - - node1 - 9000 - - - node2 - 9000 - - - node3 - 9000 - - - diff --git a/tests/integration/test_backup_restore_on_cluster/configs/cluster3.xml b/tests/integration/test_backup_restore_on_cluster/configs/cluster3.xml new file mode 100644 index 000000000000..a591f22447ec --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/configs/cluster3.xml @@ -0,0 +1,20 @@ + + + + + + node1 + 9000 + + + node2 + 9000 + + + node3 + 9000 + + + + + diff --git a/tests/integration/test_backup_restore_on_cluster/configs/slow_replicated_merge_tree.xml b/tests/integration/test_backup_restore_on_cluster/configs/slow_replicated_merge_tree.xml new file mode 100644 index 000000000000..c6bc1e318024 --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/configs/slow_replicated_merge_tree.xml @@ -0,0 +1,10 @@ + + + + 250 + + + 250 + + + diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index c76e9718640b..e8d24934c7f8 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -10,7 +10,8 @@ cluster = ClickHouseCluster(__file__) main_configs = [ - "configs/remote_servers.xml", + "configs/cluster.xml", + "configs/cluster3.xml", "configs/replicated_access_storage.xml", "configs/replicated_user_defined_sql_objects.xml", "configs/backups_disk.xml", diff --git a/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py new file mode 100644 index 000000000000..987f86694885 --- /dev/null +++ b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py @@ -0,0 +1,119 @@ +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV, assert_eq_with_retry, exec_query_with_retry + + +cluster = ClickHouseCluster(__file__) + +main_configs = [ + "configs/backups_disk.xml", + "configs/cluster.xml", + "configs/slow_replicated_merge_tree.xml", +] + +user_configs = [ + "configs/allow_database_types.xml", + "configs/zookeeper_retries.xml", +] + +node1 = cluster.add_instance( + "node1", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "node1", "shard": "shard1"}, + with_zookeeper=True, +) + +node2 = cluster.add_instance( + "node2", + main_configs=main_configs, + user_configs=user_configs, + external_dirs=["/backups/"], + macros={"replica": "node2", "shard": "shard1"}, + with_zookeeper=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def drop_after_test(): + try: + yield + finally: + node1.query("DROP DATABASE IF EXISTS mydb ON CLUSTER 'cluster' SYNC") + + +backup_id_counter = 0 + + +def new_backup_name(): + global backup_id_counter + backup_id_counter += 1 + return f"Disk('backups', '{backup_id_counter}')" + + +def test_replicated_database_async(): + node1.query( + "CREATE DATABASE mydb ON CLUSTER 'cluster' ENGINE=Replicated('/clickhouse/path/','{shard}','{replica}')" + ) + + node1.query("CREATE TABLE mydb.tbl(x UInt8) ENGINE=ReplicatedMergeTree ORDER BY x") + + node1.query( + "CREATE TABLE mydb.tbl2(y String) ENGINE=ReplicatedMergeTree ORDER BY y" + ) + + node2.query("SYSTEM SYNC DATABASE REPLICA mydb") + + node1.query("INSERT INTO mydb.tbl VALUES (1)") + node1.query("INSERT INTO mydb.tbl VALUES (22)") + node2.query("INSERT INTO mydb.tbl2 VALUES ('a')") + node2.query("INSERT INTO mydb.tbl2 VALUES ('bb')") + node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl") + + backup_name = new_backup_name() + [id, status] = node1.query( + f"BACKUP DATABASE mydb ON CLUSTER 'cluster' TO {backup_name} ASYNC" + ).split("\t") + + assert status == "CREATING_BACKUP\n" or status == "BACKUP_CREATED\n" + + assert_eq_with_retry( + node1, + f"SELECT status, error FROM system.backups WHERE id='{id}'", + TSV([["BACKUP_CREATED", ""]]), + ) + + node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC") + + [id, status] = node1.query( + f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} ASYNC" + ).split("\t") + + assert status == "RESTORING\n" or status == "RESTORED\n" + + assert_eq_with_retry( + node1, + f"SELECT status, error FROM system.backups WHERE id='{id}'", + TSV([["RESTORED", ""]]), + ) + + # exec_query_with_retry() is here because `SYSTEM SYNC REPLICA` can throw `TABLE_IS_READ_ONLY` + # if any of these tables didn't start completely yet. + exec_query_with_retry(node1, "SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl") + exec_query_with_retry(node1, "SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl2") + + assert node1.query("SELECT * FROM mydb.tbl ORDER BY x") == TSV([1, 22]) + assert node2.query("SELECT * FROM mydb.tbl2 ORDER BY y") == TSV(["a", "bb"]) + assert node2.query("SELECT * FROM mydb.tbl ORDER BY x") == TSV([1, 22]) + assert node1.query("SELECT * FROM mydb.tbl2 ORDER BY y") == TSV(["a", "bb"]) From cc31b837f2d9fc44334d831a24898e1321b50134 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 2 Apr 2024 15:28:57 +0200 Subject: [PATCH 137/470] User specific S3 endpoint backup/restore ON CLUSTER --- src/Backups/BackupIO_S3.cpp | 10 +++-- src/Backups/BackupIO_S3.h | 21 +++++++++- src/Backups/BackupsWorker.cpp | 1 + src/Backups/registerBackupEngineS3.cpp | 6 ++- src/Storages/StorageS3Settings.cpp | 4 +- src/Storages/StorageS3Settings.h | 2 +- .../configs/remote_servers.xml | 12 ++++++ .../test_backup_restore_s3/test.py | 40 ++++++++++++++++++- 8 files changed, 84 insertions(+), 12 deletions(-) create mode 100644 tests/integration/test_backup_restore_s3/configs/remote_servers.xml diff --git a/src/Backups/BackupIO_S3.cpp b/src/Backups/BackupIO_S3.cpp index 2063af2061cc..4b7e3d1e775a 100644 --- a/src/Backups/BackupIO_S3.cpp +++ b/src/Backups/BackupIO_S3.cpp @@ -124,11 +124,12 @@ BackupReaderS3::BackupReaderS3( bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, - const ContextPtr & context_) + const ContextPtr & context_, + bool is_internal_backup) : BackupReaderDefault(read_settings_, write_settings_, getLogger("BackupReaderS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); @@ -214,11 +215,12 @@ BackupWriterS3::BackupWriterS3( const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, - const ContextPtr & context_) + const ContextPtr & context_, + bool is_internal_backup) : BackupWriterDefault(read_settings_, write_settings_, getLogger("BackupWriterS3")) , s3_uri(s3_uri_) , data_source_description{DataSourceType::ObjectStorage, ObjectStorageType::S3, MetadataStorageType::None, s3_uri.endpoint, false, false} - , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName())) + , s3_settings(context_->getStorageS3Settings().getSettings(s3_uri.uri.toString(), context_->getUserName(), /*ignore_user=*/is_internal_backup)) { auto & request_settings = s3_settings.request_settings; request_settings.updateFromSettings(context_->getSettingsRef()); diff --git a/src/Backups/BackupIO_S3.h b/src/Backups/BackupIO_S3.h index 57108d122ea5..f81eb975df3f 100644 --- a/src/Backups/BackupIO_S3.h +++ b/src/Backups/BackupIO_S3.h @@ -18,7 +18,15 @@ namespace DB class BackupReaderS3 : public BackupReaderDefault { public: - BackupReaderS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupReaderS3( + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool is_internal_backup); ~BackupReaderS3() override; bool fileExists(const String & file_name) override; @@ -41,7 +49,16 @@ class BackupReaderS3 : public BackupReaderDefault class BackupWriterS3 : public BackupWriterDefault { public: - BackupWriterS3(const S3::URI & s3_uri_, const String & access_key_id_, const String & secret_access_key_, bool allow_s3_native_copy, const String & storage_class_name, const ReadSettings & read_settings_, const WriteSettings & write_settings_, const ContextPtr & context_); + BackupWriterS3( + const S3::URI & s3_uri_, + const String & access_key_id_, + const String & secret_access_key_, + bool allow_s3_native_copy, + const String & storage_class_name, + const ReadSettings & read_settings_, + const WriteSettings & write_settings_, + const ContextPtr & context_, + bool is_internal_backup); ~BackupWriterS3() override; bool fileExists(const String & file_name) override; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 96fe770227c8..d345223b3b4f 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -940,6 +940,7 @@ void BackupsWorker::doRestore( backup_open_params.use_same_s3_credentials_for_base_backup = restore_settings.use_same_s3_credentials_for_base_backup; backup_open_params.read_settings = getReadSettingsForRestore(context); backup_open_params.write_settings = getWriteSettingsForRestore(context); + backup_open_params.is_internal_backup = restore_settings.internal; BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params); String current_database = context->getCurrentDatabase(); diff --git a/src/Backups/registerBackupEngineS3.cpp b/src/Backups/registerBackupEngineS3.cpp index fed5c6b4d22c..c34dbe273f5e 100644 --- a/src/Backups/registerBackupEngineS3.cpp +++ b/src/Backups/registerBackupEngineS3.cpp @@ -110,7 +110,8 @@ void registerBackupEngineS3(BackupFactory & factory) params.allow_s3_native_copy, params.read_settings, params.write_settings, - params.context); + params.context, + params.is_internal_backup); return std::make_unique( params.backup_info, @@ -129,7 +130,8 @@ void registerBackupEngineS3(BackupFactory & factory) params.s3_storage_class, params.read_settings, params.write_settings, - params.context); + params.context, + params.is_internal_backup); return std::make_unique( params.backup_info, diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 5887018268b1..04634bcf1b3f 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -292,7 +292,7 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U } } -S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user) const +S3Settings StorageS3Settings::getSettings(const String & endpoint, const String & user, bool ignore_user) const { std::lock_guard lock(mutex); auto next_prefix_setting = s3_settings.upper_bound(endpoint); @@ -302,7 +302,7 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint, const String { std::advance(possible_prefix_setting, -1); const auto & [endpoint_prefix, settings] = *possible_prefix_setting; - if (endpoint.starts_with(endpoint_prefix) && settings.auth_settings.canBeUsedByUser(user)) + if (endpoint.starts_with(endpoint_prefix) && (ignore_user || settings.auth_settings.canBeUsedByUser(user))) return possible_prefix_setting->second; } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 21b6264717eb..0f972db02b11 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -112,7 +112,7 @@ class StorageS3Settings public: void loadFromConfig(const String & config_elem, const Poco::Util::AbstractConfiguration & config, const Settings & settings); - S3Settings getSettings(const String & endpoint, const String & user) const; + S3Settings getSettings(const String & endpoint, const String & user, bool ignore_user = false) const; private: mutable std::mutex mutex; diff --git a/tests/integration/test_backup_restore_s3/configs/remote_servers.xml b/tests/integration/test_backup_restore_s3/configs/remote_servers.xml new file mode 100644 index 000000000000..9607aac20031 --- /dev/null +++ b/tests/integration/test_backup_restore_s3/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + node + 9000 + + + + + diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index d65fc1f09d6b..05424887736e 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterable +from typing import Dict import pytest from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV @@ -13,11 +13,13 @@ "configs/named_collection_s3_backups.xml", "configs/s3_settings.xml", "configs/blob_log.xml", + "configs/remote_servers.xml", ], user_configs=[ "configs/zookeeper_retries.xml", ], with_minio=True, + with_zookeeper=True, ) @@ -544,9 +546,45 @@ def create_user(user): "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", user="regularuser", ) + node.query( "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup1.zip', 'RawBLOB')", user="superuser1", ) + assert "Access Denied" in node.query_and_get_error( + "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + ) + + node.query( + "BACKUP TABLE specific_auth ON CLUSTER 'cluster' TO S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="regularuser", + ) + + node.query( + "RESTORE TABLE specific_auth ON CLUSTER 'cluster' FROM S3('http://minio1:9001/root/data/backups/limited/backup3/')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="regularuser", + ) + + node.query( + "SELECT * FROM s3('http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="superuser1", + ) + + assert "Access Denied" in node.query_and_get_error( + "SELECT * FROM s3Cluster(cluster, 'http://minio1:9001/root/data/backups/limited/backup3/*', 'RawBLOB')", + user="regularuser", + ) + node.query("DROP TABLE IF EXISTS test.specific_auth") From c7a28b137ad9ca75c44bb531fc79ba034e3e311d Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 3 Apr 2024 07:24:20 +0000 Subject: [PATCH 138/470] Update version_date.tsv and changelogs after v24.3.2.23-lts --- docker/keeper/Dockerfile | 2 +- docker/server/Dockerfile.alpine | 2 +- docker/server/Dockerfile.ubuntu | 2 +- docs/changelogs/v24.3.2.23-lts.md | 29 ++++++++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 5 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 docs/changelogs/v24.3.2.23-lts.md diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 3daa62cb212a..346868e19c46 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-keeper" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index ace01ae9a9f5..36f09c092f85 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" ARG DIRECT_DOWNLOAD_URLS="" diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index e92823b686a9..531a50efe969 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="24.3.1.2672" +ARG VERSION="24.3.2.23" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docs/changelogs/v24.3.2.23-lts.md b/docs/changelogs/v24.3.2.23-lts.md new file mode 100644 index 000000000000..4d59a1cedf60 --- /dev/null +++ b/docs/changelogs/v24.3.2.23-lts.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 1 +sidebar_label: 2024 +--- + +# 2024 Changelog + +### ClickHouse release v24.3.2.23-lts (8b7d910960c) FIXME as compared to v24.3.1.2672-lts (2c5c589a882) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix logical error in group_by_use_nulls + grouping set + analyzer + materialize/constant [#61567](https://github.com/ClickHouse/ClickHouse/pull/61567) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix external table cannot parse data type Bool [#62115](https://github.com/ClickHouse/ClickHouse/pull/62115) ([Duc Canh Le](https://github.com/canhld94)). +* Revert "Merge pull request [#61564](https://github.com/ClickHouse/ClickHouse/issues/61564) from liuneng1994/optimize_in_single_value" [#62135](https://github.com/ClickHouse/ClickHouse/pull/62135) ([Raúl Marín](https://github.com/Algunenano)). + +#### CI Fix or Improvement (changelog entry is not required) + +* Backported in [#62030](https://github.com/ClickHouse/ClickHouse/issues/62030):. [#61869](https://github.com/ClickHouse/ClickHouse/pull/61869) ([Nikita Fomichev](https://github.com/fm4v)). +* Backported in [#62057](https://github.com/ClickHouse/ClickHouse/issues/62057): ... [#62044](https://github.com/ClickHouse/ClickHouse/pull/62044) ([Max K.](https://github.com/maxknv)). +* Backported in [#62204](https://github.com/ClickHouse/ClickHouse/issues/62204):. [#62190](https://github.com/ClickHouse/ClickHouse/pull/62190) ([Konstantin Bogdanov](https://github.com/thevar1able)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix some crashes with analyzer and group_by_use_nulls. [#61933](https://github.com/ClickHouse/ClickHouse/pull/61933) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix scalars create as select [#61998](https://github.com/ClickHouse/ClickHouse/pull/61998) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Ignore IfChainToMultiIfPass if returned type changed. [#62059](https://github.com/ClickHouse/ClickHouse/pull/62059) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix type for ConvertInToEqualPass [#62066](https://github.com/ClickHouse/ClickHouse/pull/62066) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Revert output Pretty in tty [#62090](https://github.com/ClickHouse/ClickHouse/pull/62090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index ca1a23a99db4..060a0107c1e3 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v24.3.2.23-lts 2024-04-03 v24.3.1.2672-lts 2024-03-27 v24.2.2.71-stable 2024-03-15 v24.2.1.2248-stable 2024-02-29 From 8f40db2fb2c520a8907914f8f5799026c43ed3f2 Mon Sep 17 00:00:00 2001 From: Mark Needham Date: Wed, 3 Apr 2024 08:26:52 +0100 Subject: [PATCH 139/470]
missing closing / --- docs/en/sql-reference/window-functions/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index 19821781d0e0..32ebc6d028f7 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -24,7 +24,7 @@ ClickHouse supports the standard grammar for defining windows and window functio | Calculating aggregate functions over a frame (`sum(value) over (order by time)`) | ✅ (All aggregate functions are supported) | | `rank()`, `dense_rank()`, `row_number()` | ✅ | | `lag/lead(value, offset)` | ❌
You can use one of the following workarounds:
1) `any(value) over (.... rows between preceding and preceding)`, or `following` for `lead`
2) `lagInFrame/leadInFrame`, which are analogous, but respect the window frame. To get behavior identical to `lag/lead`, use `rows between unbounded preceding and unbounded following` | -| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | +| ntile(buckets) | ✅
Specify window like, (partition by x order by y rows between unbounded preceding and unrounded following). | ## ClickHouse-specific Window Functions From d7827eaf57974caf9229d0bc3dcb80470a3e3fd9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 09:48:36 +0200 Subject: [PATCH 140/470] Close: https://github.com/ClickHouse/ClickHouse/issues/23104 --- .../03053_analyzer_join_alias.reference | 0 .../0_stateless/03053_analyzer_join_alias.sql | 43 +++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 tests/queries/0_stateless/03053_analyzer_join_alias.reference create mode 100644 tests/queries/0_stateless/03053_analyzer_join_alias.sql diff --git a/tests/queries/0_stateless/03053_analyzer_join_alias.reference b/tests/queries/0_stateless/03053_analyzer_join_alias.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03053_analyzer_join_alias.sql b/tests/queries/0_stateless/03053_analyzer_join_alias.sql new file mode 100644 index 000000000000..7e11cc7c810e --- /dev/null +++ b/tests/queries/0_stateless/03053_analyzer_join_alias.sql @@ -0,0 +1,43 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23104 +DROP DATABASE IF EXISTS test_03053; +CREATE DATABASE test_03053; + +CREATE TABLE test_03053.base +( +`id` UInt64, +`id2` UInt64, +`d` UInt64, +`value` UInt64 +) +ENGINE=MergeTree() +PARTITION BY d +ORDER BY (id,id2,d); + +CREATE TABLE test_03053.derived1 +( + `id1` UInt64, + `d1` UInt64, + `value1` UInt64 +) +ENGINE = MergeTree() +PARTITION BY d1 +ORDER BY (id1, d1); + +CREATE TABLE test_03053.derived2 +( + `id2` UInt64, + `d2` UInt64, + `value2` UInt64 +) +ENGINE = MergeTree() +PARTITION BY d2 +ORDER BY (id2, d2); + +SELECT + base.id AS `base.id`, + derived2.id2 AS `derived2.id2`, + derived2.value2 AS `derived2.value2`, + derived1.value1 AS `derived1.value1` +FROM test_03053.base AS base +LEFT JOIN test_03053.derived2 AS derived2 ON base.id2 = derived2.id2 +LEFT JOIN test_03053.derived1 AS derived1 ON base.id = derived1.id1; From d78ab39b4bccdd18120408ab0e050515c5fbe465 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 09:52:38 +0200 Subject: [PATCH 141/470] Close: https://github.com/ClickHouse/ClickHouse/issues/21584 --- .../0_stateless/03054_analyzer_join_alias.reference | 1 + .../0_stateless/03054_analyzer_join_alias.sql | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03054_analyzer_join_alias.reference create mode 100644 tests/queries/0_stateless/03054_analyzer_join_alias.sql diff --git a/tests/queries/0_stateless/03054_analyzer_join_alias.reference b/tests/queries/0_stateless/03054_analyzer_join_alias.reference new file mode 100644 index 000000000000..f599e28b8ab0 --- /dev/null +++ b/tests/queries/0_stateless/03054_analyzer_join_alias.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/0_stateless/03054_analyzer_join_alias.sql b/tests/queries/0_stateless/03054_analyzer_join_alias.sql new file mode 100644 index 000000000000..0bf93258aa62 --- /dev/null +++ b/tests/queries/0_stateless/03054_analyzer_join_alias.sql @@ -0,0 +1,12 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/21584 +SELECT count() +FROM +( + SELECT number AS key_1 + FROM numbers(15) +) AS x +ALL INNER JOIN +( + SELECT number AS key_1 + FROM numbers(10) +) AS z ON key_1 = z.key_1; From 34c5fbd4cfef424e6430376adc6168bdd28392bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Wed, 3 Apr 2024 07:53:28 +0000 Subject: [PATCH 142/470] Fix style --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index d2ec68186664..0c9d4cfe9efe 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1222,8 +1222,10 @@ void ReplicatedMergeTreeQueue::waitForCurrentlyExecutingOpsInRange(const MergeTr continue; const auto virtual_part_names = entry->getVirtualPartNames(format_version); - for(const auto& virtual_part_name: virtual_part_names) { - if (!part_info.isDisjoint(MergeTreePartInfo::fromPartName(virtual_part_name, format_version))){ + for (const auto & virtual_part_name : virtual_part_names) + { + if (!part_info.isDisjoint(MergeTreePartInfo::fromPartName(virtual_part_name, format_version))) + { to_wait.push_back(entry); break; } From 6805103ce3545d3e3d94d18a2ae26d5976b2c9c6 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 09:58:11 +0200 Subject: [PATCH 143/470] Close: https://github.com/ClickHouse/ClickHouse/issues/23344 --- .../03055_analyzer_subquery_group_array.reference | 1 + .../0_stateless/03055_analyzer_subquery_group_array.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03055_analyzer_subquery_group_array.reference create mode 100644 tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql diff --git a/tests/queries/0_stateless/03055_analyzer_subquery_group_array.reference b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.reference new file mode 100644 index 000000000000..d05b1f927f4b --- /dev/null +++ b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql new file mode 100644 index 000000000000..071d8f8e1c81 --- /dev/null +++ b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql @@ -0,0 +1,5 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23344 +SELECT logTrace(repeat('Hello', 100)), ignore(*) +FROM ( + SELECT ignore((SELECT groupArrayState(([number], [number])) FROM numbers(19000))) +) From 809461ccdae2896e7b91dba36667645864a9aaae Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 10:03:47 +0200 Subject: [PATCH 144/470] Close: https://github.com/ClickHouse/ClickHouse/issues/22627 --- ...3056_analyzer_double_subquery_alias.reference | 1 + .../03056_analyzer_double_subquery_alias.sql | 16 ++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference create mode 100644 tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference new file mode 100644 index 000000000000..72749c905a31 --- /dev/null +++ b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.reference @@ -0,0 +1 @@ +1 1 1 diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql new file mode 100644 index 000000000000..b86ae97c8bfd --- /dev/null +++ b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql @@ -0,0 +1,16 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/22627 +WITH + x AS + ( + SELECT 1 AS a + ), + xx AS + ( + SELECT * + FROM x + , x AS x1 + , x AS x2 + ) +SELECT * +FROM xx +WHERE a = 1; From a401d328fd35a65973306571e67539fcd8ec4909 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 10:06:11 +0200 Subject: [PATCH 145/470] Close: https://github.com/ClickHouse/ClickHouse/issues/10276 --- .../03057_analyzer_subquery_alias_join.reference | 1 + .../03057_analyzer_subquery_alias_join.sql | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03057_analyzer_subquery_alias_join.reference create mode 100644 tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql diff --git a/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.reference b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.reference new file mode 100644 index 000000000000..3e43e6addc00 --- /dev/null +++ b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.reference @@ -0,0 +1 @@ +1000 100000 diff --git a/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql new file mode 100644 index 000000000000..13852471dca5 --- /dev/null +++ b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql @@ -0,0 +1,12 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/10276 +SELECT + sum(x.n) as n, + sum(z.n) as n2 +FROM +( + SELECT 1000 AS n,1 as id +) AS x +join (select 10000 as n,1 as id) as y +on x.id = y.id +left join (select 100000 as n,1 as id) as z +on x.id = z.id; From 651d382695b9399d9f74e6369772975c3b26746a Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 3 Apr 2024 11:17:57 +0200 Subject: [PATCH 146/470] Minor changes to CramersV functions --- .../aggregate-functions/reference/cramersv.md | 11 ++++++++--- .../reference/cramersvbiascorrected.md | 10 ++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index e9e2c367610d..77b6043dc13b 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -7,21 +7,26 @@ sidebar_position: 351 [Cramer's V](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V) (sometimes referred to as Cramer's phi) is a measure of association between two columns in a table. The result of the `cramersV` function ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. It may be viewed as the association between two variables as a percentage of their maximum possible variation. +:::note +For a bias corrected version of Cramer's V see: [cramersVBiasCorrected](./cramersvbiascorrected.md) +::: + **Syntax** ``` sql cramersV(column1, column2) ``` -**Arguments** +**Parameters** -- `column1` and `column2` are the columns to be compared +- `column1`: first column to be compared. +- `column2`: second column to be compared. **Returned value** - a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). -**Return type** is always [Float64](../../../sql-reference/data-types/float.md). +Type: always [Float64](../../../sql-reference/data-types/float.md). **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index f5ad3a8a937a..53ec5c0a985e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -5,26 +5,24 @@ sidebar_position: 352 # cramersVBiasCorrected - Cramer's V is a measure of association between two columns in a table. The result of the [`cramersV` function](./cramersv.md) ranges from 0 (corresponding to no association between the variables) to 1 and can reach 1 only when each value is completely determined by the other. The function can be heavily biased, so this version of Cramer's V uses the [bias correction](https://en.wikipedia.org/wiki/Cram%C3%A9r%27s_V#Bias_correction). - - **Syntax** ``` sql cramersVBiasCorrected(column1, column2) ``` -**Arguments** +**Parameters** -- `column1` and `column2` are the columns to be compared +- `column1`: first column to be compared. +- `column2`: second column to be compared. **Returned value** - a value between 0 (corresponding to no association between the columns' values) to 1 (complete association). -**Return type** is always [Float64](../../../sql-reference/data-types/float.md). +Type: always [Float64](../../../sql-reference/data-types/float.md). **Example** From 3e2616d921ec5c128138b57a2f20b7e07bedbf7b Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 3 Apr 2024 11:23:42 +0200 Subject: [PATCH 147/470] Add 'Query:' before query for consistency --- docs/en/sql-reference/aggregate-functions/reference/cramersv.md | 2 ++ .../aggregate-functions/reference/cramersvbiascorrected.md | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md index 77b6043dc13b..2424ff952378 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersv.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersv.md @@ -32,6 +32,8 @@ Type: always [Float64](../../../sql-reference/data-types/float.md). The following two columns being compared below have no association with each other, so the result of `cramersV` is 0: +Query: + ``` sql SELECT cramersV(a, b) diff --git a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md index 53ec5c0a985e..939c04e3fdc2 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md +++ b/docs/en/sql-reference/aggregate-functions/reference/cramersvbiascorrected.md @@ -28,6 +28,8 @@ Type: always [Float64](../../../sql-reference/data-types/float.md). The following two columns being compared below have a small association with each other. Notice the result of `cramersVBiasCorrected` is smaller than the result of `cramersV`: +Query: + ``` sql SELECT cramersV(a, b), From 2f45d98c970740c2263812fe3044616787f49d96 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 10:03:04 +0000 Subject: [PATCH 148/470] Docs: Improve wording of DROP TABLE docs --- docs/en/sql-reference/statements/drop.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 159ab09ab946..833ff7564492 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -20,11 +20,10 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] ## DROP TABLE -Deletes the table. -In case when `IF EMPTY` clause is specified server will check if table is empty only on replica that received initial query. +Deletes one or more tables. :::tip -Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) +To undo the deletion of a table, please see see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) ::: Syntax: @@ -33,7 +32,9 @@ Syntax: DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db1.]name_1[, [db2.]name_2, ...] [ON CLUSTER cluster] [SYNC] ``` -Note that deleting multiple tables at the same time is a non-atomic deletion. If a table fails to be deleted, subsequent tables will not be deleted. +Limitations: +- If the clause `IF EMPTY` is specified, the server checks the emptiness of the table only on the replica which received the query. +- Deleting multiple tables at once is not an atomic operation, i.e. if the deletion of a table fails, subsequent tables will not be deleted. ## DROP DICTIONARY From ab42b9736dbf5b501064aef13271f226285d859f Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 3 Apr 2024 12:15:19 +0200 Subject: [PATCH 149/470] Added arrayDotProduct --- .../functions/array-functions.md | 53 ++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 67a4c0268518..2ddce6d6f71f 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -774,6 +774,57 @@ Returns the number of elements for which `func(arr1[i], …, arrN[i])` returns s Note that the `arrayCount` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You can pass a lambda function to it as the first argument. +## arrayDotProduct + +**Syntax** + +```sql +arrayDotProduct(vector1, vector2) +``` + +Alias: `scalarProduct` + +**Parameters** + +- `vector1`: First vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values. +- `vector2`: Second vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values. + +Note::: +The sizes of the two vectors must be equal. Arrays and Tuples may also contain mixed element types. +::: + +**Returned value** + +- The dot product of the two vectors. + +Type: numeric - determined by the type of the arguments. If Arrays or Tuples contain mixed element types then the result type is the supertype. + +**Examples** + +Query: + +```sql +SELECT arrayDotProduct([1, 2, 3], [4, 5, 6]) AS res, toTypeName(res); +``` + +Result: + +```response +32 UInt16 +``` + +Query: + +```sql +SELECT dotProduct((1::UInt16, 2::UInt8, 3::Float32),(4::Int16, 5::Float32, 6::UInt8)) AS res, toTypeName(res); +``` + +Result: + +```response +32 Float64 +``` + ## countEqual(arr, x) Returns the number of elements in the array equal to x. Equivalent to arrayCount (elem -\> elem = x, arr). @@ -1652,7 +1703,7 @@ flatten(array_of_arrays) Alias: `flatten`. -**Arguments** +**Parameters** - `array_of_arrays` — [Array](../../sql-reference/data-types/array.md) of arrays. For example, `[[1,2,3], [4,5]]`. From 9d9ee6ce06447bf35d2591976b43bd1e6beeed68 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 3 Apr 2024 12:16:50 +0200 Subject: [PATCH 150/470] Remove assertion --- src/Interpreters/Cache/FileSegment.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index f56f5d3f66c4..ac42c4d75bb3 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -845,7 +845,6 @@ bool FileSegment::assertCorrectnessUnlocked(const FileSegmentGuard::Lock & lock) chassert(downloaded_size == range().size()); chassert(downloaded_size > 0); chassert(std::filesystem::file_size(getPath()) > 0); - chassert(queue_iterator); check_iterator(queue_iterator); } else From 4b668d09906b2052c2ee2ee043067c5075e77071 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 2 Apr 2024 10:51:41 +0200 Subject: [PATCH 151/470] Unlimited output_format_pretty_max_value_width for --pager Signed-off-by: Azat Khuzhin --- src/Client/ClientBase.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7a3192d1d9cf..4865ef4ef725 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -712,11 +712,20 @@ void ClientBase::adjustSettings() settings.input_format_values_allow_data_after_semicolon.changed = false; } - /// If pager is specified then output_format_pretty_max_rows is ignored, this should be handled by pager. - if (!pager.empty() && !global_context->getSettingsRef().output_format_pretty_max_rows.changed) + /// Do not limit pretty format output in case of --pager specified. + if (!pager.empty()) { - settings.output_format_pretty_max_rows = std::numeric_limits::max(); - settings.output_format_pretty_max_rows.changed = false; + if (!global_context->getSettingsRef().output_format_pretty_max_rows.changed) + { + settings.output_format_pretty_max_rows = std::numeric_limits::max(); + settings.output_format_pretty_max_rows.changed = false; + } + + if (!global_context->getSettingsRef().output_format_pretty_max_value_width.changed) + { + settings.output_format_pretty_max_value_width = std::numeric_limits::max(); + settings.output_format_pretty_max_value_width.changed = false; + } } global_context->setSettings(settings); From e19c635edd3b511adf1decffaa70829e77f59b6a Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 10:12:55 +0200 Subject: [PATCH 152/470] Close: https://github.com/ClickHouse/ClickHouse/issues/4567 --- ...03058_analyzer_ambiguous_columns.reference | 0 .../03058_analyzer_ambiguous_columns.sql | 25 +++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/queries/0_stateless/03058_analyzer_ambiguous_columns.reference create mode 100644 tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql diff --git a/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.reference b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql new file mode 100644 index 000000000000..47df6e76a389 --- /dev/null +++ b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql @@ -0,0 +1,25 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/4567 +DROP TABLE IF EXISTS fact; +DROP TABLE IF EXISTS animals; +DROP TABLE IF EXISTS colors; + +create table fact(id Int64, animal_key Int64, color_key Int64) Engine = MergeTree order by tuple(); +insert into fact values (1,1,1),(2,2,2); + +create table animals(animal_key UInt64, animal_name String) Engine = MergeTree order by tuple(); +insert into animals values (0, 'unknown'); + +create table colors(color_key UInt64, color_name String) Engine = MergeTree order by tuple(); +insert into colors values (0, 'unknown'); + + +select id, animal_name, a.animal_key, color_name, color_key +from fact a + left join (select toInt64(animal_key) animal_key, animal_name from animals) b on (a.animal_key = b.animal_key) + left join (select toInt64(color_key) color_key, color_name from colors) c on (a.color_key = c.color_key); -- { serverError AMBIGUOUS_IDENTIFIER } + +select id, animal_name, animal_key, color_name, color_key +from fact a + left join (select toInt64(animal_key) animal_key, animal_name from animals) b on (a.animal_key = b.animal_key) + left join (select toInt64(color_key) color_key, color_name from colors) c on (a.color_key = c.color_key); -- { serverError AMBIGUOUS_IDENTIFIER } + From 5e30984d8d7780182dda35c827b4c4626c06210d Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 10:19:14 +0200 Subject: [PATCH 153/470] Close: https://github.com/ClickHouse/ClickHouse/issues/17710 --- .../03059_analyzer_join_engine_missing_column.reference | 0 .../03059_analyzer_join_engine_missing_column.sql | 8 ++++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.reference create mode 100644 tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql diff --git a/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.reference b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql new file mode 100644 index 000000000000..76150335f7ef --- /dev/null +++ b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql @@ -0,0 +1,8 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/17710 +CREATE TABLE id_val(id UInt32, val UInt32) ENGINE = Memory; +CREATE TABLE id_val_join0(id UInt32, val UInt8) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 0; +CREATE TABLE id_val_join1(id UInt32, val UInt8) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1; + +SELECT * FROM id_val ANY LEFT JOIN id_val_join0 USING (id) SETTINGS join_use_nulls = 0; + +SELECT * FROM id_val ANY LEFT JOIN id_val_join1 USING (id) SETTINGS join_use_nulls = 1; From 197e6008ea54d158b73cbc55c657f48801f08b48 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 10:24:58 +0200 Subject: [PATCH 154/470] Close: https://github.com/ClickHouse/ClickHouse/issues/11068 --- .../03060_analyzer_regular_view_alias.reference | 0 .../03060_analyzer_regular_view_alias.sql | 15 +++++++++++++++ 2 files changed, 15 insertions(+) create mode 100644 tests/queries/0_stateless/03060_analyzer_regular_view_alias.reference create mode 100644 tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql diff --git a/tests/queries/0_stateless/03060_analyzer_regular_view_alias.reference b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql new file mode 100644 index 000000000000..ba0257d7b3bc --- /dev/null +++ b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql @@ -0,0 +1,15 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/11068 +create table vt(datetime_value DateTime, value Float64) Engine=Memory; + +create view computed_datum_hours as +SELECT + toStartOfHour(b.datetime_value) AS datetime_desc, + sum(b.value) AS value +FROM vt AS b +GROUP BY toStartOfHour(b.datetime_value); + +SELECT + toStartOfHour(b.datetime_value) AS datetime_desc, + sum(b.value) AS value +FROM vt AS b +GROUP BY toStartOfHour(b.datetime_value); From bd90cd532fdc1dff06db3a8f10dfd25c76eb4234 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 11:10:37 +0200 Subject: [PATCH 155/470] Close: https://github.com/ClickHouse/ClickHouse/issues/24395 --- ...03061_analyzer_alias_as_right_key_in_join.reference | 10 ++++++++++ .../03061_analyzer_alias_as_right_key_in_join.sql | 7 +++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.reference create mode 100644 tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql diff --git a/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.reference b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.reference new file mode 100644 index 000000000000..af98bcd63975 --- /dev/null +++ b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.reference @@ -0,0 +1,10 @@ +0 0 +1 1 +2 2 +3 3 +4 4 +5 5 +6 6 +7 7 +8 8 +9 9 diff --git a/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql new file mode 100644 index 000000000000..e223909a5a8e --- /dev/null +++ b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql @@ -0,0 +1,7 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/24395 +CREATE TABLE xxxx_yyy (key UInt32, key_b ALIAS key) ENGINE=MergeTree() ORDER BY key; +INSERT INTO xxxx_yyy SELECT number FROM numbers(10); + +SELECT * +FROM xxxx_yyy AS a +INNER JOIN xxxx_yyy AS b ON a.key = b.key_b; From b66cfb4c823eae628ed8e5639d494f71768c93ea Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 11:14:18 +0200 Subject: [PATCH 156/470] Close: https://github.com/ClickHouse/ClickHouse/issues/23416 --- ...062_analyzer_join_engine_missing_column.reference | 2 ++ .../03062_analyzer_join_engine_missing_column.sql | 12 ++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.reference create mode 100644 tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql diff --git a/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.reference b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.reference new file mode 100644 index 000000000000..d496ccad6b62 --- /dev/null +++ b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.reference @@ -0,0 +1,2 @@ +abc 0 0 0 1 +abc 0 0 0 1 diff --git a/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql new file mode 100644 index 000000000000..6c24ef6f66d0 --- /dev/null +++ b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql @@ -0,0 +1,12 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23416 +create table test (TOPIC String, PARTITION UInt64, OFFSET UInt64, ID UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03062', 'r2') ORDER BY (TOPIC, PARTITION, OFFSET); + +create table test_join (TOPIC String, PARTITION UInt64, OFFSET UInt64) ENGINE = Join(ANY, LEFT, `TOPIC`, `PARTITION`) SETTINGS join_any_take_last_row = 1; + +insert into test values('abc',0,0,0); + +insert into test_join values('abc',0,1); + +select *, joinGet('test_join', 'OFFSET', TOPIC, PARTITION) from test; + +select * from test any left join test_join using (TOPIC, PARTITION); From cc6bcb606eac18334823354500a79d2203be8c44 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 11:22:32 +0200 Subject: [PATCH 157/470] Close: https://github.com/ClickHouse/ClickHouse/issues/23162 --- ...er_multi_join_wrong_table_specifier.reference | 0 ...analyzer_multi_join_wrong_table_specifier.sql | 16 ++++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.reference create mode 100644 tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql diff --git a/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.reference b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql new file mode 100644 index 000000000000..c2c29b688cdb --- /dev/null +++ b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql @@ -0,0 +1,16 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/23162 +CREATE TABLE t1 ( k Int64, x Int64) ENGINE = Memory; + +CREATE TABLE t2( x Int64 ) ENGINE = Memory; + +create table s (k Int64, d DateTime) Engine=Memory; + +SELECT * FROM t1 +INNER JOIN s ON t1.k = s.k +INNER JOIN t2 ON t2.x = t1.x +WHERE (t1.d >= now()); -- { serverError UNKNOWN_IDENTIFIER } + +SELECT * FROM t1 +INNER JOIN s ON t1.k = s.k +WHERE (t1.d >= now()); -- { serverError UNKNOWN_IDENTIFIER } + From e09187ea2e9473cd5bad59fd5eaf61a4ddc91e30 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 11:27:43 +0200 Subject: [PATCH 158/470] Close: https://github.com/ClickHouse/ClickHouse/issues/25655 --- .../0_stateless/03064_analyzer_named_subqueries.reference | 1 + .../queries/0_stateless/03064_analyzer_named_subqueries.sql | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03064_analyzer_named_subqueries.reference create mode 100644 tests/queries/0_stateless/03064_analyzer_named_subqueries.sql diff --git a/tests/queries/0_stateless/03064_analyzer_named_subqueries.reference b/tests/queries/0_stateless/03064_analyzer_named_subqueries.reference new file mode 100644 index 000000000000..556d825db42a --- /dev/null +++ b/tests/queries/0_stateless/03064_analyzer_named_subqueries.reference @@ -0,0 +1 @@ +2 1 diff --git a/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql b/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql new file mode 100644 index 000000000000..ef8aca2fefac --- /dev/null +++ b/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql @@ -0,0 +1,5 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/25655 +SELECT + sum(t.b) / 1 a, + sum(t.a) +FROM ( SELECT 1 a, 2 b ) t; From 71d85653e9a44c340520a63374af172610a989db Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 11:29:24 +0200 Subject: [PATCH 159/470] Close: https://github.com/ClickHouse/ClickHouse/issues/11757 --- .../03065_analyzer_cross_join_and_array_join.reference | 2 ++ .../0_stateless/03065_analyzer_cross_join_and_array_join.sql | 2 ++ 2 files changed, 4 insertions(+) create mode 100644 tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.reference create mode 100644 tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql diff --git a/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.reference b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.reference new file mode 100644 index 000000000000..594a6a2deeb9 --- /dev/null +++ b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.reference @@ -0,0 +1,2 @@ +1 3 +2 4 diff --git a/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql new file mode 100644 index 000000000000..c270a0f45041 --- /dev/null +++ b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql @@ -0,0 +1,2 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/11757 +select * from (select [1, 2] a) aa cross join (select [3, 4] b) bb array join aa.a, bb.b; From 0758faa039edf0a1b61d05c17b2568e0a53693ea Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 11:32:26 +0200 Subject: [PATCH 160/470] Add test for analyzer and enable_global_with_statement=1 --- .../03066_analyzer_global_with_statement.reference | 1 + .../0_stateless/03066_analyzer_global_with_statement.sql | 7 +++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03066_analyzer_global_with_statement.reference create mode 100644 tests/queries/0_stateless/03066_analyzer_global_with_statement.sql diff --git a/tests/queries/0_stateless/03066_analyzer_global_with_statement.reference b/tests/queries/0_stateless/03066_analyzer_global_with_statement.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03066_analyzer_global_with_statement.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql b/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql new file mode 100644 index 000000000000..338eb30e6ffd --- /dev/null +++ b/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql @@ -0,0 +1,7 @@ +WITH 0 AS test +SELECT * +FROM +( + SELECT 1 AS test +) +SETTINGS enable_global_with_statement = 1 From 7872d920586e7843a8dd1db027ecf7c9fa75b07c Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 11:35:08 +0200 Subject: [PATCH 161/470] Add test for analyzer and complex alias join and with --- .../03067_analyzer_complex_alias_join.reference | 1 + .../0_stateless/03067_analyzer_complex_alias_join.sql | 9 +++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03067_analyzer_complex_alias_join.reference create mode 100644 tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql diff --git a/tests/queries/0_stateless/03067_analyzer_complex_alias_join.reference b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.reference new file mode 100644 index 000000000000..6192a595f1e5 --- /dev/null +++ b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.reference @@ -0,0 +1 @@ +key \N diff --git a/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql new file mode 100644 index 000000000000..7d1264a61162 --- /dev/null +++ b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql @@ -0,0 +1,9 @@ +with d as (select 'key'::Varchar(255) c, 'x'::Varchar(255) s) +SELECT r1, c as r2 +FROM ( + SELECT t as s, c as r1 + FROM ( SELECT 'y'::Varchar(255) as t, 'x'::Varchar(255) as s) t1 + LEFT JOIN d USING (s) + ) t2 +LEFT JOIN d using (s) +SETTINGS join_use_nulls=1; From ece7099017285db81637c5f65fad9678301a22f9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 12:04:36 +0200 Subject: [PATCH 162/470] Close: https://github.com/ClickHouse/ClickHouse/issues/6571 --- .../03068_analyzer_distributed_join.reference | 2 + .../03068_analyzer_distributed_join.sql | 55 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 tests/queries/0_stateless/03068_analyzer_distributed_join.reference create mode 100644 tests/queries/0_stateless/03068_analyzer_distributed_join.sql diff --git a/tests/queries/0_stateless/03068_analyzer_distributed_join.reference b/tests/queries/0_stateless/03068_analyzer_distributed_join.reference new file mode 100644 index 000000000000..1444d39d9578 --- /dev/null +++ b/tests/queries/0_stateless/03068_analyzer_distributed_join.reference @@ -0,0 +1,2 @@ +localhost 9000 0 0 0 +localhost 9000 0 0 0 diff --git a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql new file mode 100644 index 000000000000..714a64a01b67 --- /dev/null +++ b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql @@ -0,0 +1,55 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/6571 +CREATE TABLE LINEITEM_shard ON CLUSTER test_shard_localhost +( + L_ORDERKEY UInt64, + L_COMMITDATE UInt32, + L_RECEIPTDATE UInt32 +) +ENGINE = MergeTree() +ORDER BY L_ORDERKEY; + +CREATE TABLE LINEITEM AS LINEITEM_shard +ENGINE = Distributed('test_shard_localhost', currentDatabase(), LINEITEM_shard, rand()); + +CREATE TABLE ORDERS_shard ON CLUSTER test_shard_localhost +( + O_ORDERKEY UInt64, + O_ORDERPRIORITY UInt32 +) +ENGINE = MergeTree() +ORDER BY O_ORDERKEY; + +CREATE TABLE ORDERS AS ORDERS_shard +ENGINE = Distributed('test_shard_localhost', currentDatabase(), ORDERS_shard, rand()); + +SET joined_subquery_requires_alias=0; + +select + O_ORDERPRIORITY, + count(*) as order_count +from ORDERS JOIN ( + select L_ORDERKEY + from + LINEITEM_shard + group by L_ORDERKEY + having any(L_COMMITDATE < L_RECEIPTDATE) +) on O_ORDERKEY=L_ORDERKEY +group by O_ORDERPRIORITY +order by O_ORDERPRIORITY +limit 1; + +SET joined_subquery_requires_alias=1; + +select + O_ORDERPRIORITY, + count(*) as order_count +from ORDERS JOIN ( + select L_ORDERKEY + from + LINEITEM_shard + group by L_ORDERKEY + having any(L_COMMITDATE < L_RECEIPTDATE) +) AS x on O_ORDERKEY=L_ORDERKEY +group by O_ORDERPRIORITY +order by O_ORDERPRIORITY +limit 1; From 71450c909a30e2ae4120aad5dbff518f6d5985da Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 3 Apr 2024 10:39:43 +0000 Subject: [PATCH 163/470] Fix lambda(tuple(x), x + 1) syntax in analyzer --- src/Analyzer/QueryTreeBuilder.cpp | 2 +- src/Interpreters/ActionsVisitor.cpp | 7 +++-- src/Parsers/ASTFunction.cpp | 11 ++++++++ src/Parsers/ASTFunction.h | 3 +++ src/Parsers/ExpressionElementParsers.cpp | 4 +-- .../02343_analyzer_lambdas.reference | 8 ++++++ .../0_stateless/02343_analyzer_lambdas.sql | 26 +++++++++++++++++++ 7 files changed, 54 insertions(+), 7 deletions(-) diff --git a/src/Analyzer/QueryTreeBuilder.cpp b/src/Analyzer/QueryTreeBuilder.cpp index df80f46b3cd1..d2587d74b7c7 100644 --- a/src/Analyzer/QueryTreeBuilder.cpp +++ b/src/Analyzer/QueryTreeBuilder.cpp @@ -558,7 +558,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildExpression(const ASTPtr & expression, co } else if (const auto * function = expression->as()) { - if (function->is_lambda_function) + if (function->is_lambda_function || isASTLambdaFunction(*function)) { const auto & lambda_arguments_and_expression = function->arguments->as().children; auto & lambda_arguments_tuple = lambda_arguments_and_expression.at(0)->as(); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 16e2449206d0..093c266c785a 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1130,12 +1130,11 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & const auto * query_parameter = child->as(); if (function && function->name == "lambda") { - /// If the argument is a lambda expression, just remember its approximate type. - if (function->arguments->children.size() != 2) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); + if (!isASTLambdaFunction(*function)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Lambda function definition expects two arguments, first argument must be a tuple of arguments"); + /// If the argument is a lambda expression, just remember its approximate type. const auto * lambda_args_tuple = function->arguments->children.at(0)->as(); - if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 07eea86ef811..cdc9a471e985 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -793,4 +793,15 @@ bool tryGetFunctionNameInto(const IAST * ast, String & name) return false; } +bool isASTLambdaFunction(const ASTFunction & function) +{ + if (function.name == "lambda" && function.arguments && function.arguments->children.size() == 2) + { + const auto * lambda_args_tuple = function.arguments->children.at(0)->as(); + return lambda_args_tuple && lambda_args_tuple->name == "tuple"; + } + + return false; +} + } diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 631b6285bfa4..3a94691f25da 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -111,4 +111,7 @@ inline String getFunctionName(const ASTPtr & ast) { return getFunctionName(ast.g inline std::optional tryGetFunctionName(const ASTPtr & ast) { return tryGetFunctionName(ast.get()); } inline bool tryGetFunctionNameInto(const ASTPtr & ast, String & name) { return tryGetFunctionNameInto(ast.get(), name); } +/// Checks if function is a lambda function definition `lambda((x, y), x + y)` +bool isASTLambdaFunction(const ASTFunction & function); + } diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 67f4a306292d..29b497367837 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1545,8 +1545,8 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e { if (auto * func = lambda->as(); func && func->name == "lambda") { - if (func->arguments->children.size() != 2) - throw Exception(ErrorCodes::SYNTAX_ERROR, "lambda requires two arguments"); + if (!isASTLambdaFunction(*func)) + throw Exception(ErrorCodes::SYNTAX_ERROR, "Lambda function definition expects two arguments, first argument must be a tuple of arguments"); const auto * lambda_args_tuple = func->arguments->children.at(0)->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.reference b/tests/queries/0_stateless/02343_analyzer_lambdas.reference index 8d29481c2555..62d9e9f47265 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.reference +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.reference @@ -27,3 +27,11 @@ Lambda untuple Lambda carrying 2 1 1 0 +Lambda legacy syntax +[2,3,4] +[2,3,4] +[2,3,4] +['hello','world'] +[2,3,4] +[2,3,4] 2 +[2,3,4] 2 1 diff --git a/tests/queries/0_stateless/02343_analyzer_lambdas.sql b/tests/queries/0_stateless/02343_analyzer_lambdas.sql index b90f7b32b57a..0c257cf6f18b 100644 --- a/tests/queries/0_stateless/02343_analyzer_lambdas.sql +++ b/tests/queries/0_stateless/02343_analyzer_lambdas.sql @@ -65,5 +65,31 @@ SELECT 'Lambda carrying'; WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, 1), lambda(functor_2, 1); WITH (functor, x) -> functor(x) AS lambda, x -> x + 1 AS functor_1, x -> toString(x) AS functor_2 SELECT lambda(functor_1, id), lambda(functor_2, id) FROM test_table; + +SELECT 'Lambda legacy syntax'; + +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]); + +WITH 222 AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]); + +SELECT arrayMap(lambda((x,), x + 1), [1, 2, 3]); + +SELECT arraySort(lambda((x, y), y), ['world', 'hello'], [2, 1]); + +WITH 222 AS lambda +SELECT arrayMap(lambda((x, ), x + 1), [1, 2, 3]); + +WITH x -> x + 1 AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda(1); + +-- lambda(tuple(x), x + 1) parsed as lambda definion but not as call of lambda defined in WITH +WITH (x, y) -> y AS lambda +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda(tuple(x), x + 1), 1 AS x; -- { serverError BAD_ARGUMENTS } + +WITH (x, y) -> y AS lambda2 +SELECT arrayMap(lambda(tuple(x), x + 1), [1, 2, 3]), lambda2(tuple(x), x + 1), 1 AS x; + + DROP TABLE test_table_tuple; DROP TABLE test_table; From e693418f6312772bfcde777533f141754c377d41 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 3 Apr 2024 10:45:45 +0000 Subject: [PATCH 164/470] Fix crash in index definition containing sql udf --- .../InterpreterCreateIndexQuery.cpp | 4 +- .../MySQL/InterpretersMySQLDDLQuery.cpp | 6 +-- src/Parsers/ASTIndexDeclaration.cpp | 54 +++++++++++++++---- src/Parsers/ASTIndexDeclaration.h | 15 +++--- src/Parsers/ParserCreateIndexQuery.cpp | 11 ++-- src/Parsers/ParserCreateQuery.cpp | 10 ++-- src/Storages/IndicesDescription.cpp | 18 +++---- src/Storages/MergeTree/MergeTreeData.cpp | 5 +- .../StorageSystemDataSkippingIndices.cpp | 6 ++- ...033_index_definition_sql_udf_bug.reference | 1 + .../03033_index_definition_sql_udf_bug.sql | 21 ++++++++ 11 files changed, 103 insertions(+), 48 deletions(-) create mode 100644 tests/queries/0_stateless/03033_index_definition_sql_udf_bug.reference create mode 100644 tests/queries/0_stateless/03033_index_definition_sql_udf_bug.sql diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index aed4b0587b40..6045b5d2e24a 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -39,12 +39,12 @@ BlockIO InterpreterCreateIndexQuery::execute() } // Noop if allow_create_index_without_type = true. throw otherwise - if (!create_index.index_decl->as()->type) + if (!create_index.index_decl->as()->getType()) { if (!current_context->getSettingsRef().allow_create_index_without_type) { throw Exception(ErrorCodes::INCORRECT_QUERY, "CREATE INDEX without TYPE is forbidden." - " SET allow_create_index_without_type=1 to ignore this statements."); + " SET allow_create_index_without_type=1 to ignore this statements"); } else { diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index fd7ffca28723..4821d607d0eb 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -498,14 +498,12 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( columns->columns->children.emplace_back(create_materialized_column_declaration(version_column_name, "UInt64", UInt64(1))); /// Add minmax skipping index for _version column. - auto version_index = std::make_shared(); - version_index->name = version_column_name; auto index_expr = std::make_shared(version_column_name); auto index_type = makeASTFunction("minmax"); index_type->no_empty_args = true; - version_index->set(version_index->expr, index_expr); - version_index->set(version_index->type, index_type); + auto version_index = std::make_shared(index_expr, index_type, version_column_name); version_index->granularity = 1; + ASTPtr indices = std::make_shared(); indices->children.push_back(version_index); columns->set(columns->indices, indices); diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp index 8dac5389c803..32689f31546c 100644 --- a/src/Parsers/ASTIndexDeclaration.cpp +++ b/src/Parsers/ASTIndexDeclaration.cpp @@ -8,24 +8,57 @@ namespace DB { -ASTPtr ASTIndexDeclaration::clone() const +namespace ErrorCodes { - auto res = std::make_shared(); + extern const int LOGICAL_ERROR; +} + + +ASTIndexDeclaration::ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_) + : name(name_) +{ + if (!expression) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration must have an expression"); + children.push_back(expression); - res->name = name; - if (granularity) - res->granularity = granularity; - if (expr) - res->set(res->expr, expr->clone()); if (type) - res->set(res->type, type->clone()); + { + if (!dynamic_cast(type.get())) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration type must be a function"); + children.push_back(type); + } +} + +ASTPtr ASTIndexDeclaration::clone() const +{ + auto expr = getExpression(); + auto type = getType(); + auto res = std::make_shared(expr, type, name); + res->granularity = granularity; + return res; } +ASTPtr ASTIndexDeclaration::getExpression() const +{ + if (children.size() <= expression_idx) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration must have an expression"); + return children[expression_idx]; +} + +std::shared_ptr ASTIndexDeclaration::getType() const +{ + if (children.size() <= type_idx) + return nullptr; + auto func_ast = std::dynamic_pointer_cast(children[type_idx]); + if (!func_ast) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index declaration type must be a function"); + return func_ast; +} void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { - if (expr) + if (auto expr = getExpression()) { if (part_of_create_index_query) { @@ -46,11 +79,12 @@ void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & sta } } - if (type) + if (auto type = getType()) { s.ostr << (s.hilite ? hilite_keyword : "") << " TYPE " << (s.hilite ? hilite_none : ""); type->formatImpl(s, state, frame); } + if (granularity) { s.ostr << (s.hilite ? hilite_keyword : "") << " GRANULARITY " << (s.hilite ? hilite_none : ""); diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index 1fbf5e126959..dd05ad081848 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -16,9 +16,9 @@ class ASTIndexDeclaration : public IAST static const auto DEFAULT_ANNOY_INDEX_GRANULARITY = 100'000'000uz; static const auto DEFAULT_USEARCH_INDEX_GRANULARITY = 100'000'000uz; + ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const String & name_); + String name; - IAST * expr; - ASTFunction * type; UInt64 granularity; bool part_of_create_index_query = false; @@ -28,11 +28,12 @@ class ASTIndexDeclaration : public IAST ASTPtr clone() const override; void formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const override; - void forEachPointerToChild(std::function f) override - { - f(reinterpret_cast(&expr)); - f(reinterpret_cast(&type)); - } + ASTPtr getExpression() const; + std::shared_ptr getType() const; + +private: + static constexpr size_t expression_idx = 0; + static constexpr size_t type_idx = 1; }; } diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 3b1b9d8ec848..fd2bbbab1778 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -54,19 +54,18 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected return false; } - auto index = std::make_shared(); + /// name is set below in ParserCreateIndexQuery + auto index = std::make_shared(expr, type, ""); index->part_of_create_index_query = true; - index->set(index->expr, expr); - if (type) - index->set(index->type, type); if (granularity) index->granularity = granularity->as().value.safeGet(); else { - if (index->type && index->type->name == "annoy") + auto index_type = index->getType(); + if (index_type && index_type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; - else if (index->type && index->type->name == "usearch") + else if (index_type && index_type->name == "usearch") index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 1510cc8e1954..ff88b58760b3 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -205,18 +205,16 @@ bool ParserIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected & expe return false; } - auto index = std::make_shared(); - index->name = name->as().name(); - index->set(index->expr, expr); - index->set(index->type, type); + auto index = std::make_shared(expr, type, name->as().name()); if (granularity) index->granularity = granularity->as().value.safeGet(); else { - if (index->type->name == "annoy") + auto index_type = index->getType(); + if (index_type->name == "annoy") index->granularity = ASTIndexDeclaration::DEFAULT_ANNOY_INDEX_GRANULARITY; - else if (index->type->name == "usearch") + else if (index_type->name == "usearch") index->granularity = ASTIndexDeclaration::DEFAULT_USEARCH_INDEX_GRANULARITY; else index->granularity = ASTIndexDeclaration::DEFAULT_INDEX_GRANULARITY; diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 14555dca63b4..13375a1f4dc8 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -85,22 +85,23 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast if (index_definition->name.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Skip index must have name in definition."); - if (!index_definition->type) + auto index_type = index_definition->getType(); + if (!index_type) throw Exception(ErrorCodes::INCORRECT_QUERY, "TYPE is required for index"); - if (index_definition->type->parameters && !index_definition->type->parameters->children.empty()) + if (index_type->parameters && !index_type->parameters->children.empty()) throw Exception(ErrorCodes::INCORRECT_QUERY, "Index type cannot have parameters"); IndexDescription result; result.definition_ast = index_definition->clone(); result.name = index_definition->name; - result.type = Poco::toLower(index_definition->type->name); + result.type = Poco::toLower(index_definition->getType()->name); result.granularity = index_definition->granularity; ASTPtr expr_list; - if (index_definition->expr) + if (auto index_expression = index_definition->getExpression()) { - expr_list = extractKeyExpressionList(index_definition->expr->clone()); + expr_list = extractKeyExpressionList(index_expression); ReplaceAliasToExprVisitor::Data data{columns}; ReplaceAliasToExprVisitor{data}.visit(expr_list); @@ -125,12 +126,11 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast result.data_types.push_back(elem.type); } - const auto & definition_arguments = index_definition->type->arguments; - if (definition_arguments) + if (index_type && index_type->arguments) { - for (size_t i = 0; i < definition_arguments->children.size(); ++i) + for (size_t i = 0; i < index_type->arguments->children.size(); ++i) { - const auto * argument = definition_arguments->children[i]->as(); + const auto * argument = index_type->arguments->children[i]->as(); if (!argument) throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments"); result.arguments.emplace_back(argument->value); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e984f306e2e1..6163a8dbee90 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -652,8 +652,9 @@ void MergeTreeData::checkProperties( if (!allow_suspicious_indices && !attach) { const auto * index_ast = typeid_cast(index.definition_ast.get()); - if (const auto * index_function = typeid_cast(index_ast->expr)) - checkSuspiciousIndices(index_function); + auto index_function = index_ast ? index_ast->getType() : nullptr; + if (index_function) + checkSuspiciousIndices(index_function.get()); } MergeTreeIndexFactory::instance().validate(index, attach); diff --git a/src/Storages/System/StorageSystemDataSkippingIndices.cpp b/src/Storages/System/StorageSystemDataSkippingIndices.cpp index ff782647c791..2afc03d0e5ea 100644 --- a/src/Storages/System/StorageSystemDataSkippingIndices.cpp +++ b/src/Storages/System/StorageSystemDataSkippingIndices.cpp @@ -131,8 +131,10 @@ class DataSkippingIndicesSource : public ISource // 'type_full' column if (column_mask[src_index++]) { - if (auto * expression = index.definition_ast->as(); expression && expression->type) - res_columns[res_index++]->insert(queryToString(*expression->type)); + auto * expression = index.definition_ast->as(); + auto index_type = expression ? expression->getType() : nullptr; + if (index_type) + res_columns[res_index++]->insert(queryToString(*index_type)); else res_columns[res_index++]->insertDefault(); } diff --git a/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.reference b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.reference new file mode 100644 index 000000000000..5782593a4550 --- /dev/null +++ b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.reference @@ -0,0 +1 @@ +2 2 2 diff --git a/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.sql b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.sql new file mode 100644 index 000000000000..84ab1d33c948 --- /dev/null +++ b/tests/queries/0_stateless/03033_index_definition_sql_udf_bug.sql @@ -0,0 +1,21 @@ +-- Tags: no-parallel + +DROP FUNCTION IF EXISTS test_func_1; +CREATE FUNCTION test_func_1 AS (a, b, c) -> ((a + b) + c); + +DROP TABLE IF EXISTS t4_2; +CREATE TABLE t4_2 +( + `col1` Int64 NOT NULL COMMENT 'test', + `col2` Float64 NOT NULL, + `col3` Int64 NOT NULL, + INDEX ind4 test_func_1(col1, col3, col1) TYPE set(51) GRANULARITY 5 +) +ENGINE = MergeTree +ORDER BY col1 +; + +INSERT INTO t4_2 (col1, col2, col3) SELECT number, number, number FROM numbers(10); + +SELECT * FROM t4_2 WHERE test_func_1(col1, col3, col1) = 6 +SETTINGS force_data_skipping_indices = 'ind4'; From fdb752de6f56a4cfdc2b48b342c873b861d2ddb9 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 12:52:28 +0200 Subject: [PATCH 165/470] Close: https://github.com/ClickHouse/ClickHouse/issues/4432 --- .../03069_analyzer_with_alias_in_array_join.reference | 2 ++ .../0_stateless/03069_analyzer_with_alias_in_array_join.sql | 5 +++++ 2 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.reference create mode 100644 tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql diff --git a/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.reference b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.reference new file mode 100644 index 000000000000..1191247b6d9a --- /dev/null +++ b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.reference @@ -0,0 +1,2 @@ +1 +2 diff --git a/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql new file mode 100644 index 000000000000..84ad0b4e199c --- /dev/null +++ b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql @@ -0,0 +1,5 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/4432 +WITH [1, 2] AS zz +SELECT x +FROM system.one +ARRAY JOIN zz AS x From 0782ccaa91fa4a850cea00f52a660ee818e8e3c8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 12:53:09 +0200 Subject: [PATCH 166/470] Update docs/en/sql-reference/statements/drop.md Co-authored-by: Han Fei --- docs/en/sql-reference/statements/drop.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index 833ff7564492..98b849ecf3b8 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -23,7 +23,7 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] Deletes one or more tables. :::tip -To undo the deletion of a table, please see see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) +To undo the deletion of a table, please see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) ::: Syntax: From c9430180258631d44fb7677c1ea725db586d63e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 13:00:25 +0200 Subject: [PATCH 167/470] Include table name in paranoid checks --- src/Storages/StorageReplicatedMergeTree.cpp | 32 +++++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ca061db4ecf..70d77432847a 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1524,8 +1524,13 @@ void StorageReplicatedMergeTree::paranoidCheckForCoveredPartsInZooKeeperOnStart( if (!found) { - LOG_WARNING(log, "Part {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " - "It may cause false-positive 'part is lost forever' messages", part_name, covering_part); + LOG_WARNING( + log, + "Part {} of table {} exists in ZooKeeper and covered by another part in ZooKeeper ({}), but doesn't exist on any disk. " + "It may cause false-positive 'part is lost forever' messages", + part_name, + getStorageID().getNameForLogs(), + covering_part); ProfileEvents::increment(ProfileEvents::ReplicatedCoveredPartsInZooKeeperOnStart); chassert(false); } @@ -2351,8 +2356,12 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::executeFetchShared } } -static void paranoidCheckForCoveredPartsInZooKeeper(const ZooKeeperPtr & zookeeper, const String & replica_path, - MergeTreeDataFormatVersion format_version, const String & covering_part_name) +static void paranoidCheckForCoveredPartsInZooKeeper( + const ZooKeeperPtr & zookeeper, + const String & replica_path, + MergeTreeDataFormatVersion format_version, + const String & covering_part_name, + const StorageReplicatedMergeTree & storage) { #ifdef ABORT_ON_LOGICAL_ERROR constexpr bool paranoid_check_for_covered_parts_default = true; @@ -2371,8 +2380,12 @@ static void paranoidCheckForCoveredPartsInZooKeeper(const ZooKeeperPtr & zookeep { auto part_info = MergeTreePartInfo::fromPartName(part_name, format_version); if (drop_range_info.contains(part_info)) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Part {} remains in ZooKeeper after DROP_RANGE {}", part_name, covering_part_name); + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Part {} from table {} remains in ZooKeeper after DROP_RANGE {}", + part_name, + storage.getStorageID().getNameForLogs(), + covering_part_name); } } @@ -2434,7 +2447,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) /// Forcibly remove parts from ZooKeeper removePartsFromZooKeeperWithRetries(parts_to_remove); - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry.new_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry.new_part_name, *this); if (entry.detach) LOG_DEBUG(log, "Detached {} parts inside {}.", parts_to_remove.size(), entry.new_part_name); @@ -2572,7 +2585,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) LOG_INFO(log, "All parts from REPLACE PARTITION command have been already attached"); removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper( + getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name, *this); return true; } @@ -2893,7 +2907,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(LogEntry & entry) removePartsFromZooKeeperWithRetries(parts_to_remove); if (replace) - paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name); + paranoidCheckForCoveredPartsInZooKeeper(getZooKeeper(), replica_path, format_version, entry_replace.drop_range_part_name, *this); res_parts.clear(); parts_to_remove.clear(); cleanup_thread.wakeup(); From 0b831ab87a1d6cc510042d021639548b2600bfdb Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 13:09:43 +0200 Subject: [PATCH 168/470] Close: https://github.com/ClickHouse/ClickHouse/issues/8259 --- ...0_analyzer_CTE_scalar_as_numbers.reference | 25 +++++++++++++++++++ .../03070_analyzer_CTE_scalar_as_numbers.sql | 5 ++++ 2 files changed, 30 insertions(+) create mode 100644 tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.reference create mode 100644 tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql diff --git a/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.reference b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.reference new file mode 100644 index 000000000000..595b4d6b5b85 --- /dev/null +++ b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.reference @@ -0,0 +1,25 @@ +0 25 +1 25 +2 25 +3 25 +4 25 +5 25 +6 25 +7 25 +8 25 +9 25 +10 25 +11 25 +12 25 +13 25 +14 25 +15 25 +16 25 +17 25 +18 25 +19 25 +20 25 +21 25 +22 25 +23 25 +24 25 diff --git a/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql new file mode 100644 index 000000000000..672c4f53e5fe --- /dev/null +++ b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql @@ -0,0 +1,5 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/8259 +with + (select 25) as something +select *, something +from numbers(toUInt64(assumeNotNull(something))); From f822791ee397d6def5bd64adc47b077b484d0058 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 13:17:19 +0200 Subject: [PATCH 169/470] Close: https://github.com/ClickHouse/ClickHouse/issues/9233 --- ...r_array_join_forbid_non_existing_columns.reference | 0 ...nalyzer_array_join_forbid_non_existing_columns.sql | 11 +++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.reference create mode 100644 tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql diff --git a/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.reference b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql new file mode 100644 index 000000000000..af81e3c28190 --- /dev/null +++ b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql @@ -0,0 +1,11 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/9233 +SELECT * +FROM +( + SELECT + [1, 2, 3] AS x, + [4, 5, 6] AS y +) +ARRAY JOIN + x, + Y; -- { serverError UNKNOWN_IDENTIFIER } From 15dd5ce5f665467d33bb02e7d7ba2decfade3e87 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 3 Apr 2024 08:39:16 -0300 Subject: [PATCH 170/470] use raw literal string --- src/IO/S3/URI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 69b539cde8be..7f628d27f34e 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -40,7 +40,7 @@ URI::URI(const std::string & uri_) /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html - static const RE2 aws_private_link_style_pattern("bucket\\.vpce\\-([a-z0-9\\-.:]+)\\.vpce.amazonaws.com"); + static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.:]+)\.vpce.amazonaws.com)"); /// Case when bucket name and key represented in path of S3 URL. /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) From 40817de19a674763af7f7b83bd7712053f0ae18d Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 13:44:31 +0200 Subject: [PATCH 171/470] Close: https://github.com/ClickHouse/ClickHouse/issues/14699 --- .../03072_analyzer_missing_columns_from_subquery.reference | 1 + .../03072_analyzer_missing_columns_from_subquery.sql | 2 ++ 2 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.reference create mode 100644 tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql diff --git a/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.reference b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql new file mode 100644 index 000000000000..68ff81413b76 --- /dev/null +++ b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql @@ -0,0 +1,2 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/14699 +select * from (select number from numbers(1)) where not ignore(*); From 44b3ce9ec88ac147e33856170c402bac3444bb54 Mon Sep 17 00:00:00 2001 From: kevinyhzou Date: Wed, 3 Apr 2024 19:49:55 +0800 Subject: [PATCH 172/470] review fix --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index f578e0c8d0af..0ecd6d81f25e 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -125,7 +125,7 @@ static std::map sett {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"input_format_hive_text_allow_variable_number_of_columns", true, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, + {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, From 04445b30ef3722f601dde6825fbffd1a9704fbc1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 3 Apr 2024 11:50:14 +0000 Subject: [PATCH 173/470] Fix another logical error in group_by_use_nulls. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 ++-- .../0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f5474ddb662a..837d309d0312 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6155,7 +6155,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id return resolved_expression_it->second; } - bool is_nullable_group_by_key = scope.nullable_group_by_keys.contains(node) && !scope.expressions_in_resolve_process_stack.hasAggregateFunction(); + bool is_nullable_group_by_key = scope.nullable_group_by_keys.contains(node); if (is_nullable_group_by_key) ++scope.found_nullable_group_by_key_in_scope; @@ -6452,7 +6452,7 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id validateTreeSize(node, scope.context->getSettingsRef().max_expanded_ast_elements, node_to_tree_size); - if (is_nullable_group_by_key && scope.found_nullable_group_by_key_in_scope == 1) + if (is_nullable_group_by_key && scope.found_nullable_group_by_key_in_scope == 1 && !scope.expressions_in_resolve_process_stack.hasAggregateFunction()) { node = node->clone(); node->convertToNullable(); diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 012da5475817..28042fc5b440 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -5,3 +5,5 @@ SELECT tuple(tuple(number)) as x FROM numbers(10) GROUP BY (number, tuple(number select tuple(array(number)) as x FROM numbers(10) GROUP BY number, array(number) WITH ROLLUP order by x; SELECT tuple(number) AS x FROM numbers(10) GROUP BY GROUPING SETS (number) order by x; + +SELECT ignore(toFixedString('Lambda as function parameter', 28), toNullable(28), ignore(8)), sum(marks) FROM system.parts GROUP BY GROUPING SETS ((2)) FORMAT Null settings optimize_injective_functions_in_group_by=1, optimize_group_by_function_keys=1, group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION } From c576884a0b40d4870a7cbb08e62ebffec24ee328 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 13:51:06 +0200 Subject: [PATCH 174/470] Close: https://github.com/ClickHouse/ClickHouse/issues/27068 --- .../03073_analyzer_alias_as_column_name.reference | 2 ++ .../0_stateless/03073_analyzer_alias_as_column_name.sql | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03073_analyzer_alias_as_column_name.reference create mode 100644 tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql diff --git a/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.reference b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.reference new file mode 100644 index 000000000000..ca6280d2dbf4 --- /dev/null +++ b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.reference @@ -0,0 +1,2 @@ +1 1997-02-01 +2 1997-02-01 diff --git a/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql new file mode 100644 index 000000000000..0e7d2eb95c71 --- /dev/null +++ b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql @@ -0,0 +1,8 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/27068 +CREATE TABLE test ( id String, create_time DateTime ) ENGINE = MergeTree ORDER BY id; + +insert into test values(1,'1970-02-01 00:00:00'); +insert into test values(2,'1970-02-01 00:00:00'); +insert into test values(3,'1970-03-01 00:00:00'); + +select id,'1997-02-01' as create_time from test where test.create_time='1970-02-01 00:00:00' ORDER BY id From 2c569b0d4645c1945bc420fb70160c331d81e51b Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 3 Apr 2024 11:13:25 +0000 Subject: [PATCH 175/470] fix --- src/Parsers/ASTIndexDeclaration.cpp | 10 ++++++++-- src/Storages/IndicesDescription.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 7 ++++--- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp index 32689f31546c..0c36644356f3 100644 --- a/src/Parsers/ASTIndexDeclaration.cpp +++ b/src/Parsers/ASTIndexDeclaration.cpp @@ -31,8 +31,14 @@ ASTIndexDeclaration::ASTIndexDeclaration(ASTPtr expression, ASTPtr type, const S ASTPtr ASTIndexDeclaration::clone() const { - auto expr = getExpression(); - auto type = getType(); + ASTPtr expr = getExpression(); + if (expr) + expr = expr->clone(); + + ASTPtr type = getType(); + if (type) + type = type->clone(); + auto res = std::make_shared(expr, type, name); res->granularity = granularity; diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 13375a1f4dc8..cef8fd85f97d 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -95,7 +95,7 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast IndexDescription result; result.definition_ast = index_definition->clone(); result.name = index_definition->name; - result.type = Poco::toLower(index_definition->getType()->name); + result.type = Poco::toLower(index_type->name); result.granularity = index_definition->granularity; ASTPtr expr_list; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6163a8dbee90..b60bd365eb21 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -652,9 +652,10 @@ void MergeTreeData::checkProperties( if (!allow_suspicious_indices && !attach) { const auto * index_ast = typeid_cast(index.definition_ast.get()); - auto index_function = index_ast ? index_ast->getType() : nullptr; - if (index_function) - checkSuspiciousIndices(index_function.get()); + ASTPtr index_expression = index_ast ? index_ast->getExpression() : nullptr; + const auto * index_expression_ptr = index_expression ? typeid_cast(index_expression.get()) : nullptr; + if (index_expression_ptr) + checkSuspiciousIndices(index_expression_ptr); } MergeTreeIndexFactory::instance().validate(index, attach); From 1c98c4752e916e077a1938c663324709fe5ec1c0 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 13:55:38 +0200 Subject: [PATCH 176/470] Close: https://github.com/ClickHouse/ClickHouse/issues/28687 --- .../03074_analyzer_alias_column_in_view.reference | 2 ++ .../0_stateless/03074_analyzer_alias_column_in_view.sql | 6 ++++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/03074_analyzer_alias_column_in_view.reference create mode 100644 tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql diff --git a/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.reference b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.reference new file mode 100644 index 000000000000..aa47d0d46d47 --- /dev/null +++ b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.reference @@ -0,0 +1,2 @@ +0 +0 diff --git a/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql new file mode 100644 index 000000000000..6f9704217881 --- /dev/null +++ b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql @@ -0,0 +1,6 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/28687 +create view alias (dummy int, n alias dummy) as select * from system.one; + +select n from alias; + +select * from alias where n=0; From f2997c9c8920ffeb569fcf72e4a5739597868b80 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Apr 2024 12:08:25 +0000 Subject: [PATCH 177/470] Fix settings changes history --- src/Core/SettingsChangesHistory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 219b444be2f3..ac68155a0af4 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -86,10 +86,10 @@ namespace SettingsChangesHistory static std::map settings_changes_history = { {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, + {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, - {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, {"read_from_page_cache_if_exists_otherwise_bypass_cache", false, false, "Added userspace page cache"}, {"page_cache_inject_eviction", false, false, "Added userspace page cache"}, From 90ac11171c80f18914e0f246fb886c6c5d9c32ca Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 3 Apr 2024 12:18:54 +0000 Subject: [PATCH 178/470] Fix crash --- src/Interpreters/RewriteOrderByVisitor.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Interpreters/RewriteOrderByVisitor.cpp b/src/Interpreters/RewriteOrderByVisitor.cpp index 694dec84b7a0..26817b70dc05 100644 --- a/src/Interpreters/RewriteOrderByVisitor.cpp +++ b/src/Interpreters/RewriteOrderByVisitor.cpp @@ -39,9 +39,8 @@ void RewriteOrderBy::visit(ASTPtr & ast, Data &) { // clone w/o children auto clone = std::make_shared(*order_by_elem); - clone->children.clear(); - clone->children.emplace_back(identifier); + clone->children[0] = identifier; new_order_by->children.emplace_back(clone); } if (!new_order_by->children.empty()) From f2d78f650dbb0c2145f16cd0337835bfbdc879df Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 3 Apr 2024 14:32:19 +0200 Subject: [PATCH 179/470] Fix global trace collector --- src/Common/ThreadStatus.h | 4 ---- src/Coordination/Standalone/ThreadStatusExt.cpp | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 550cb76e7366..02bf82e2da39 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -308,11 +308,7 @@ class ThreadStatus : public boost::noncopyable void flushUntrackedMemory(); -#ifdef CLICKHOUSE_KEEPER_STANDALONE_BUILD - void initGlobalProfiler(UInt64, UInt64) {} -#else void initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period); -#endif private: void applyGlobalSettings(); diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp index 97f7287be8ca..2b89e2f024de 100644 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ b/src/Coordination/Standalone/ThreadStatusExt.cpp @@ -11,4 +11,8 @@ void CurrentThread::attachToGroup(const ThreadGroupPtr &) { } +void ThreadStatus::initGlobalProfiler(UInt64, UInt64) +{ +} + } From ef0a9e889f2c9c3c9a1952e10b29fb1e94613ca3 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 14:35:34 +0200 Subject: [PATCH 180/470] Close: https://github.com/ClickHouse/ClickHouse/issues/28777 --- .../03075_analyzer_subquery_alias.reference | 1 + .../0_stateless/03075_analyzer_subquery_alias.sql | 10 ++++++++++ 2 files changed, 11 insertions(+) create mode 100644 tests/queries/0_stateless/03075_analyzer_subquery_alias.reference create mode 100644 tests/queries/0_stateless/03075_analyzer_subquery_alias.sql diff --git a/tests/queries/0_stateless/03075_analyzer_subquery_alias.reference b/tests/queries/0_stateless/03075_analyzer_subquery_alias.reference new file mode 100644 index 000000000000..556d825db42a --- /dev/null +++ b/tests/queries/0_stateless/03075_analyzer_subquery_alias.reference @@ -0,0 +1 @@ +2 1 diff --git a/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql b/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql new file mode 100644 index 000000000000..897b189b1330 --- /dev/null +++ b/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql @@ -0,0 +1,10 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/28777 +SELECT + sum(q0.a2) AS a1, + sum(q0.a1) AS a9 +FROM +( + SELECT + 1 AS a1, + 2 AS a2 +) AS q0; From 8ae1f4c4b316e6f81e87ab70e7f1d11ad7e0e771 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 14:37:58 +0200 Subject: [PATCH 181/470] Close: https://github.com/ClickHouse/ClickHouse/issues/29734 --- ...76_analyzer_multiple_joins_alias.reference | 0 .../03076_analyzer_multiple_joins_alias.sql | 51 +++++++++++++++++++ 2 files changed, 51 insertions(+) create mode 100644 tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.reference create mode 100644 tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql diff --git a/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.reference b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql new file mode 100644 index 000000000000..8b8b76a5be1c --- /dev/null +++ b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql @@ -0,0 +1,51 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/29734 +SELECT * +FROM +( + SELECT 1 AS x +) AS a +INNER JOIN +( + SELECT + 1 AS x, + 2 AS y +) AS b ON (a.x = b.x) AND (a.y = b.y); -- { serverError UNKNOWN_IDENTIFIER } + + + +SELECT * +FROM +( + SELECT 1 AS x +) AS a +INNER JOIN +( + SELECT + 1 AS x, + 2 AS y +) AS b ON (a.x = b.x) AND (a.y = b.y) +INNER JOIN +( + SELECT 3 AS x +) AS c ON a.x = c.x; -- { serverError UNKNOWN_IDENTIFIER } + + +SELECT * +FROM +( + SELECT number AS x + FROM numbers(10) +) AS a +INNER JOIN +( + SELECT + number AS x, + number AS y + FROM numbers(10) +) AS b ON (a.x = b.x) AND (a.y = b.y) +INNER JOIN +( + SELECT number AS x + FROM numbers(10) +) AS c ON a.x = c.x; -- { serverError UNKNOWN_IDENTIFIER } + From c3f1130de86a94decff8521dbfb30c20c6a984e5 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 15:10:27 +0200 Subject: [PATCH 182/470] Close: https://github.com/ClickHouse/ClickHouse/issues/33825 --- ...er_multi_scalar_subquery_aliases.reference | 21 ++++++++++++++++++ ...analyzer_multi_scalar_subquery_aliases.sql | 22 +++++++++++++++++++ ...er_multi_scalar_subquery_aliases.reference | 21 ++++++++++++++++++ ...analyzer_multi_scalar_subquery_aliases.sql | 16 ++++++++++++++ 4 files changed, 80 insertions(+) create mode 100644 tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.reference create mode 100644 tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql create mode 100644 tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.reference create mode 100644 tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql diff --git a/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.reference b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.reference new file mode 100644 index 000000000000..6b134f711d46 --- /dev/null +++ b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.reference @@ -0,0 +1,21 @@ +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +1 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 +9 9 9 9 diff --git a/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql new file mode 100644 index 000000000000..3d558bdd602d --- /dev/null +++ b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql @@ -0,0 +1,22 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/33825 +CREATE TABLE t1 (i Int64, j Int64) ENGINE = Memory; +INSERT INTO t1 SELECT number, number FROM system.numbers LIMIT 10; +SELECT + (SELECT max(i) FROM t1) as i, + (SELECT max(i) FROM t1) as j, + (SELECT max(i) FROM t1) as k, + (SELECT max(i) FROM t1) as l +FROM t1; + +SELECT 1; + +WITH ( + SELECT max(i) + FROM t1 + ) AS value +SELECT + value AS i, + value AS j, + value AS k, + value AS l +FROM t1; diff --git a/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.reference b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.reference new file mode 100644 index 000000000000..b2c49b655d40 --- /dev/null +++ b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.reference @@ -0,0 +1,21 @@ +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +111111111111 +1 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 +2222222222 diff --git a/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql new file mode 100644 index 000000000000..ded6bfbe4e3a --- /dev/null +++ b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql @@ -0,0 +1,16 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/33825 +CREATE TABLE t2 (first_column Int64, second_column Int64) ENGINE = Memory; +INSERT INTO t2 SELECT number, number FROM system.numbers LIMIT 10; + + +SELECT ( + SELECT 111111111111 + ) AS first_column +FROM t2; + +SELECT 1; + +SELECT ( + SELECT 2222222222 + ) AS second_column +FROM t2; From f6a240b7be2982b4625ffdc21e1ceeee4cd68859 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 3 Apr 2024 10:11:56 -0300 Subject: [PATCH 183/470] add some unit tests --- src/IO/S3/URI.h | 1 + src/IO/tests/gtest_s3_uri.cpp | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 2873728bc781..79f3da3fbbbc 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -17,6 +17,7 @@ namespace DB::S3 * The following patterns are allowed: * s3://bucket/key * http(s)://endpoint/bucket/key + * TODO specify aws private link */ struct URI { diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 5bf0dfb962df..b3ceb875362c 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -74,6 +74,26 @@ const TestCase TestCases[] = { "data", "", true}, + {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/root/nested/file.txt"), + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com", + "root", + "nested/file.txt", + "", + true}, + // Test with a file with no extension + {S3::URI("https://bucket.vpce-03b2c987f1bd55c5f-j3b4vg7w.s3.ap-southeast-2.vpce.amazonaws.com/some_bucket/document"), + "https://bucket.vpce-03b2c987f1bd55c5f-j3b4vg7w.s3.ap-southeast-2.vpce.amazonaws.com", + "some_bucket", + "document", + "", + true}, + // Test with a deeply nested file path + {S3::URI("https://bucket.vpce-0242cd56f1bd55c5f-l5b7vg8x.s3.sa-east-1.vpce.amazonaws.com/some_bucket/b/c/d/e/f/g/h/i/j/data.json"), + "https://bucket.vpce-0242cd56f1bd55c5f-l5b7vg8x.s3.sa-east-1.vpce.amazonaws.com", + "some_bucket", + "b/c/d/e/f/g/h/i/j/data.json", + "", + true}, }; class S3UriTest : public testing::TestWithParam From aaf9bb0e581ed16ac9a4a35dd49487ec12f30992 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 3 Apr 2024 13:12:05 +0000 Subject: [PATCH 184/470] Fixing NULL random seed for generateRandom with analyzer. --- src/Storages/checkAndGetLiteralArgument.cpp | 11 +++++++++-- src/TableFunctions/TableFunctionGenerateRandom.cpp | 13 +++++++++++-- .../0_stateless/01087_table_function_generate.sql | 2 ++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/Storages/checkAndGetLiteralArgument.cpp b/src/Storages/checkAndGetLiteralArgument.cpp index 5baf47fe91a9..39dc27cd5e81 100644 --- a/src/Storages/checkAndGetLiteralArgument.cpp +++ b/src/Storages/checkAndGetLiteralArgument.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -12,8 +13,14 @@ namespace ErrorCodes template T checkAndGetLiteralArgument(const ASTPtr & arg, const String & arg_name) { - if (arg && arg->as()) - return checkAndGetLiteralArgument(*arg->as(), arg_name); + if (arg) + { + if (const auto * func = arg->as(); func && func->name == "_CAST") + return checkAndGetLiteralArgument(func->arguments->children.at(0), arg_name); + + if (arg->as()) + return checkAndGetLiteralArgument(*arg->as(), arg_name); + } throw Exception( ErrorCodes::BAD_ARGUMENTS, diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index af2845949870..157725620794 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -4,6 +4,7 @@ #include #include +#include #include #include @@ -88,7 +89,11 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co // All the arguments must be literals. for (const auto & arg : args) { - if (!arg->as()) + const IAST * arg_raw = arg.get(); + if (const auto * func = arg_raw->as(); func && func->name == "_CAST") + arg_raw = func->arguments->children.at(0).get(); + + if (!arg_raw->as()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "All arguments of table function '{}' except structure argument must be literals. " @@ -107,7 +112,11 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co if (args.size() >= arg_index + 1) { - const auto & literal = args[arg_index]->as(); + const IAST * arg_raw = args[arg_index].get(); + if (const auto * func = arg_raw->as(); func && func->name == "_CAST") + arg_raw = func->arguments->children.at(0).get(); + + const auto & literal = arg_raw->as(); ++arg_index; if (!literal.value.isNull()) random_seed = checkAndGetLiteralArgument(literal, "random_seed"); diff --git a/tests/queries/0_stateless/01087_table_function_generate.sql b/tests/queries/0_stateless/01087_table_function_generate.sql index 10657dbd63ac..ff7c3f3477db 100644 --- a/tests/queries/0_stateless/01087_table_function_generate.sql +++ b/tests/queries/0_stateless/01087_table_function_generate.sql @@ -195,3 +195,5 @@ SELECT a, b, c, d, e, f, g, hex(h) FROM test_table_2 ORDER BY a, b, c, d, e, f, SELECT '-'; DROP TABLE IF EXISTS test_table_2; + +select * from generateRandom('x UInt64', Null, 10, 2) limit 2 format Null; From 8e6cbc8b31c93e3825219cc47463c0e854b0a26d Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Apr 2024 15:13:59 +0200 Subject: [PATCH 185/470] several fixes for client's keep alive connections --- .../Net/include/Poco/Net/HTTPClientSession.h | 13 + base/poco/Net/include/Poco/Net/HTTPMessage.h | 3 + .../Net/include/Poco/Net/HTTPServerParams.h | 2 +- .../Net/include/Poco/Net/HTTPServerSession.h | 2 + base/poco/Net/src/HTTPClientSession.cpp | 39 +- base/poco/Net/src/HTTPMessage.cpp | 41 ++ base/poco/Net/src/HTTPServerConnection.cpp | 13 +- base/poco/Net/src/HTTPServerSession.cpp | 6 + src/Common/HTTPConnectionPool.cpp | 94 ++-- src/Common/tests/gtest_connection_pool.cpp | 421 ++++++++++++++---- src/Core/ServerSettings.h | 4 +- src/IO/ConnectionTimeouts.cpp | 6 +- 12 files changed, 501 insertions(+), 143 deletions(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index 1cef988566c5..b418937c4d58 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -213,6 +213,13 @@ namespace Net Poco::Timespan getKeepAliveTimeout() const; /// Returns the connection timeout for HTTP connections. + bool isKeepAliveExpired(double reliability = 1.0) const; + /// Returns if the connection is expired with some margin as fraction of timeout as reliability + + double getKeepAliveReliability() const; + /// Returns the current fraction of keep alive timeout when connection is considered safe to use + /// It helps to avoid situation when a client uses nearly expired connection and receives NoMessageException + virtual std::ostream & sendRequest(HTTPRequest & request); /// Sends the header for the given HTTP request to /// the server. @@ -361,6 +368,7 @@ namespace Net Poco::SharedPtr _pRequestStream; Poco::SharedPtr _pResponseStream; + static const double _defaultKeepAliveReliabilityLevel; static ProxyConfig _globalProxyConfig; HTTPClientSession(const HTTPClientSession &); @@ -455,6 +463,11 @@ namespace Net _lastRequest = time; } + inline double HTTPClientSession::getKeepAliveReliability() const + { + return _defaultKeepAliveReliabilityLevel; + } + } } // namespace Poco::Net diff --git a/base/poco/Net/include/Poco/Net/HTTPMessage.h b/base/poco/Net/include/Poco/Net/HTTPMessage.h index 0bef50803a8f..994807ffbff5 100644 --- a/base/poco/Net/include/Poco/Net/HTTPMessage.h +++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h @@ -120,6 +120,9 @@ namespace Net /// The value is set to "Keep-Alive" if keepAlive is /// true, or to "Close" otherwise. + void setKeepAliveTimeout(int timeout); + int getKeepAliveTimeout() const; + bool getKeepAlive() const; /// Returns true if /// * the message has a Connection header field and its value is "Keep-Alive" diff --git a/base/poco/Net/include/Poco/Net/HTTPServerParams.h b/base/poco/Net/include/Poco/Net/HTTPServerParams.h index 3c836a630a04..d614c62d57a3 100644 --- a/base/poco/Net/include/Poco/Net/HTTPServerParams.h +++ b/base/poco/Net/include/Poco/Net/HTTPServerParams.h @@ -44,7 +44,7 @@ namespace Net /// - timeout: 60 seconds /// - keepAlive: true /// - maxKeepAliveRequests: 0 - /// - keepAliveTimeout: 10 seconds + /// - keepAliveTimeout: 15 seconds void setServerName(const std::string & serverName); /// Sets the name and port (name:port) that the server uses to identify itself. diff --git a/base/poco/Net/include/Poco/Net/HTTPServerSession.h b/base/poco/Net/include/Poco/Net/HTTPServerSession.h index ec928af304fa..3df7995509a1 100644 --- a/base/poco/Net/include/Poco/Net/HTTPServerSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPServerSession.h @@ -56,6 +56,8 @@ namespace Net SocketAddress serverAddress(); /// Returns the server's address. + void setKeepAliveTimeout(Poco::Timespan keepAliveTimeout); + private: bool _firstRequest; Poco::Timespan _keepAliveTimeout; diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 33a3dcc49014..59800232ba95 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -37,6 +37,7 @@ namespace Net { HTTPClientSession::ProxyConfig HTTPClientSession::_globalProxyConfig; +const double HTTPClientSession::_defaultKeepAliveReliabilityLevel = 0.9; HTTPClientSession::HTTPClientSession(): @@ -220,7 +221,11 @@ void HTTPClientSession::setGlobalProxyConfig(const ProxyConfig& config) void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) { - _keepAliveTimeout = timeout; + if (connected()) + { + throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection"); + } + _keepAliveTimeout = timeout; } @@ -243,6 +248,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) reconnect(); if (!keepAlive) request.setKeepAlive(false); + if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0) + request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds()); if (!request.has(HTTPRequest::HOST) && !_host.empty()) request.setHost(_host, _port); if (!_proxyConfig.host.empty() && !bypassProxy()) @@ -324,6 +331,14 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) _mustReconnect = getKeepAlive() && !response.getKeepAlive(); + if (!_mustReconnect) + { + /// when server sends its keep alive timeout, client has to follow that value + auto timeout = response.getKeepAliveTimeout(); + if (timeout > 0) + _keepAliveTimeout = Poco::Timespan(timeout, 0); + } + if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) _pResponseStream = new HTTPFixedLengthInputStream(*this, 0); else if (response.getChunkedTransferEncoding()) @@ -430,15 +445,17 @@ std::string HTTPClientSession::proxyRequestPrefix() const return result; } +bool HTTPClientSession::isKeepAliveExpired(double reliability) const +{ + Poco::Timestamp now; + return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest; +} bool HTTPClientSession::mustReconnect() const { if (!_mustReconnect) - { - Poco::Timestamp now; - return _keepAliveTimeout <= now - _lastRequest; - } - else return true; + return isKeepAliveExpired(_defaultKeepAliveReliabilityLevel); + return true; } @@ -511,14 +528,16 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) if (buffered()) throw Poco::LogicException("assign to a session with not empty buffered data"); - attachSocket(session.detachSocket()); setLastRequest(session.getLastRequest()); setResolvedHost(session.getResolvedHost()); - setKeepAlive(session.getKeepAlive()); + setProxyConfig(session.getProxyConfig()); setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout()); - setKeepAliveTimeout(session.getKeepAliveTimeout()); - setProxyConfig(session.getProxyConfig()); + setKeepAlive(session.getKeepAlive()); + if (!connected()) + setKeepAliveTimeout(session.getKeepAliveTimeout()); + + attachSocket(session.detachSocket()); session.reset(); } diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index 0cd234ee9cb3..2f974b8bf0b8 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -17,6 +17,7 @@ #include "Poco/NumberFormatter.h" #include "Poco/NumberParser.h" #include "Poco/String.h" +#include using Poco::NumberFormatter; @@ -179,4 +180,44 @@ bool HTTPMessage::getKeepAlive() const } +void HTTPMessage::setKeepAliveTimeout(int timeout) +{ + add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}", timeout)); +} + + +int parseTimeoutFromHeaderValue(const std::string_view header_value) +{ + static const std::string_view timeout_param = "timeout="; + + auto timeout_pos = header_value.find(timeout_param); + if (timeout_pos == std::string::npos) + timeout_pos = header_value.size(); + if (timeout_pos != header_value.size()) + timeout_pos += timeout_param.size(); + + auto timeout_end = header_value.find(',', timeout_pos); + if (timeout_end == std::string::npos) + timeout_end = header_value.size(); + + auto timeout_value_substr = header_value.substr(timeout_pos, timeout_end - timeout_pos); + if (timeout_value_substr.empty()) + return -1; + + int value = 0; + auto [ptr, ec] = std::from_chars(timeout_value_substr.begin(), timeout_value_substr.end(), value); + + if (ec == std::errc()) + return value; + + return -1; +} + + +int HTTPMessage::getKeepAliveTimeout() const +{ + const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); + return parseTimeoutFromHeaderValue(ka_header); +} + } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPServerConnection.cpp b/base/poco/Net/src/HTTPServerConnection.cpp index c57984b0162e..d5eb29d31343 100644 --- a/base/poco/Net/src/HTTPServerConnection.cpp +++ b/base/poco/Net/src/HTTPServerConnection.cpp @@ -88,7 +88,18 @@ void HTTPServerConnection::run() pHandler->handleRequest(request, response); session.setKeepAlive(_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()); - } + + /// all that fuzz is all about to make session close with less timeout than 15s (set in HTTPServerParams c-tor) + if (_pParams->getKeepAlive() && response.getKeepAlive() && session.canKeepAlive()) + { + int value = response.getKeepAliveTimeout(); + if (value < 0) + value = request.getKeepAliveTimeout(); + if (value > 0) + session.setKeepAliveTimeout(Poco::Timespan(value, 0)); + } + + } else sendErrorResponse(session, HTTPResponse::HTTP_NOT_IMPLEMENTED); } catch (Poco::Exception&) diff --git a/base/poco/Net/src/HTTPServerSession.cpp b/base/poco/Net/src/HTTPServerSession.cpp index d4f2b24879e4..f67a63a9e0e9 100644 --- a/base/poco/Net/src/HTTPServerSession.cpp +++ b/base/poco/Net/src/HTTPServerSession.cpp @@ -33,6 +33,12 @@ HTTPServerSession::~HTTPServerSession() { } +void HTTPServerSession::setKeepAliveTimeout(Poco::Timespan keepAliveTimeout) +{ + _keepAliveTimeout = keepAliveTimeout; +} + + bool HTTPServerSession::hasMoreRequests() { diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index cd2505df7f35..21165bbc62d5 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -83,17 +84,15 @@ namespace } - size_t roundUp(size_t x, size_t rounding) + constexpr size_t roundUp(size_t x, size_t rounding) { chassert(rounding > 0); - return (x + (rounding - 1)) / rounding * rounding; - } - - - Poco::Timespan divide(const Poco::Timespan span, int divisor) - { - return Poco::Timespan(Poco::Timestamp::TimeDiff(span.totalMicroseconds() / divisor)); + return (x + rounding) / rounding * rounding; } + static_assert(roundUp(10000, 100) == 10100); + static_assert(roundUp(10001, 100) == 10100); + static_assert(roundUp(10099, 100) == 10100); + static_assert(roundUp(10100, 100) == 10200); } namespace DB @@ -202,8 +201,9 @@ class ConnectionGroup if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until) { - LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit); mute_warning_until = roundUp(total_connections_in_group, limits.warning_step); + LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}, next warning at {}", + type, total_connections_in_group, limits.warning_limit, mute_warning_until); } } @@ -213,7 +213,7 @@ class ConnectionGroup --total_connections_in_group; - const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 10 : 1; + const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 20 : 1; if (mute_warning_until > 0 && total_connections_in_group < reduced_warning_limit) { LOG_WARNING(log, "Sessions count is OK in the group {}, count {}", type, total_connections_in_group); @@ -221,6 +221,12 @@ class ConnectionGroup } } + void atPoolDestroy(size_t connections) + { + std::lock_guard lock(mutex); + total_connections_in_group -= connections; + } + HTTPConnectionGroupType getType() const { return type; } const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const { return metrics; } @@ -273,9 +279,15 @@ class EndpointConnectionPool : public std::enable_shared_from_this; + using Session::mustReconnect; + + void markAsExpired() + { + isExpired = true; + } + void reconnect() override { - ProfileEvents::increment(metrics.reset); Session::close(); if (auto lock = pool.lock()) @@ -352,6 +364,11 @@ class EndpointConnectionPool : public std::enable_shared_from_thisatConnectionDestroy(*this); - else - ProfileEvents::increment(metrics.reset); + group->atConnectionDestroy(); + + if (!isExpired) + if (auto lock = pool.lock()) + lock->atConnectionDestroy(*this); CurrentMetrics::sub(metrics.active_count); } @@ -404,10 +422,11 @@ class EndpointConnectionPool : public std::enable_shared_from_this - explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) - : Session(args...), pool(std::move(pool_)), metrics(std::move(metrics_)) + explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, ConnectionGroup::Ptr group_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) + : Session(args...), pool(std::move(pool_)), group(group_), metrics(std::move(metrics_)) { CurrentMetrics::add(metrics.active_count); + group->atConnectionCreate(); } template @@ -433,10 +452,12 @@ class EndpointConnectionPool : public std::enable_shared_from_this expired_connections; SCOPE_EXIT({ @@ -494,8 +514,9 @@ class EndpointConnectionPool : public std::enable_shared_from_this expired_connections; SCOPE_EXIT({ @@ -535,19 +555,21 @@ class EndpointConnectionPool : public std::enable_shared_from_this & expired_connections, Poco::Timestamp now) TSA_REQUIRES(mutex) + size_t wipeExpiredImpl(std::vector & expired_connections) TSA_REQUIRES(mutex) { + auto isSoftLimitReached = group->isSoftLimitReached(); while (!stored_connections.empty()) { auto connection = stored_connections.top(); - if (!isExpired(now, connection)) + if (!isExpired(connection, isSoftLimitReached)) return stored_connections.size(); stored_connections.pop(); + connection->markAsExpired(); expired_connections.push_back(connection); } @@ -569,16 +591,16 @@ class EndpointConnectionPool : public std::enable_shared_from_thisisSoftLimitReached()) - return now > (connection->getLastRequest() + divide(connection->getKeepAliveTimeout(), 10)); - return now > connection->getLastRequest() + connection->getKeepAliveTimeout(); + if (isSoftLimitReached) + return connection->isKeepAliveExpired(0.1); + return connection->isKeepAliveExpired(0.8); } ConnectionPtr allocateNewConnection() { - ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), getMetrics(), host, port); + ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); connection->setKeepAlive(true); if (!proxy_configuration.isEmpty()) @@ -586,8 +608,6 @@ class EndpointConnectionPool : public std::enable_shared_from_thissetProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); } - group->atConnectionCreate(); - return connection; } @@ -619,8 +639,6 @@ class EndpointConnectionPool : public std::enable_shared_from_thisatConnectionDestroy(); - if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() || group->isStoreLimitReached()) { @@ -631,14 +649,14 @@ class EndpointConnectionPool : public std::enable_shared_from_thisassign(connection); - CurrentMetrics::add(getMetrics().stored_count, 1); - ProfileEvents::increment(getMetrics().preserved, 1); - { MemoryTrackerSwitcher switcher{&total_memory_tracker}; std::lock_guard lock(mutex); stored_connections.push(connection_to_store); } + + CurrentMetrics::add(getMetrics().stored_count, 1); + ProfileEvents::increment(getMetrics().preserved, 1); } @@ -726,7 +744,7 @@ createConnectionPool(ConnectionGroup::Ptr group, std::string host, UInt16 port, class HTTPConnectionPools::Impl { private: - const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 5 * 60; + const size_t DEFAULT_WIPE_TIMEOUT_SECONDS = 10 * 60; const Poco::Timespan wipe_timeout = Poco::Timespan(DEFAULT_WIPE_TIMEOUT_SECONDS, 0); ConnectionGroup::Ptr disk_group = std::make_shared(HTTPConnectionGroupType::DISK); diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp index dcc3c11fd529..36bf8bc7dae2 100644 --- a/src/Common/tests/gtest_connection_pool.cpp +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include #include @@ -17,6 +16,39 @@ namespace { +template +class SafeHandler +{ +public: + using Ptr = std::shared_ptr>; + + SafeHandler() = default; + SafeHandler(SafeHandler&) = delete; + SafeHandler& operator=(SafeHandler&) = delete; + + T get() + { + std::lock_guard lock(mutex); + return obj; + } + + void set(T && options_) + { + std::lock_guard lock(mutex); + obj = std::move(options_); + } + +protected: + std::mutex mutex; + T obj = {}; +}; + +struct RequestOptions +{ + size_t slowdown_receive = 0; + int overwrite_keep_alive_timeout = 0; +}; + size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = std::numeric_limits::max()) { const size_t buffer_size = 4096; @@ -47,13 +79,19 @@ size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = class MockRequestHandler : public Poco::Net::HTTPRequestHandler { public: - explicit MockRequestHandler(std::shared_ptr> slowdown_) - : slowdown(std::move(slowdown_)) + explicit MockRequestHandler(SafeHandler::Ptr options_) + : options(options_) { } void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override { + int value = request.getKeepAliveTimeout(); + ASSERT_GT(value, 0); + + if (options->get().overwrite_keep_alive_timeout > 0) + response.setKeepAliveTimeout(options->get().overwrite_keep_alive_timeout); + response.setStatus(Poco::Net::HTTPResponse::HTTP_OK); auto size = request.getContentLength(); if (size > 0) @@ -61,28 +99,29 @@ class MockRequestHandler : public Poco::Net::HTTPRequestHandler else response.setChunkedTransferEncoding(true); // or chunk encoding - sleepForSeconds(*slowdown); + if (options->get().slowdown_receive > 0) + sleepForSeconds(options->get().slowdown_receive); stream_copy_n(request.stream(), response.send(), size); } - std::shared_ptr> slowdown; + SafeHandler::Ptr options; }; class HTTPRequestHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory { public: - explicit HTTPRequestHandlerFactory(std::shared_ptr> slowdown_) - : slowdown(std::move(slowdown_)) + explicit HTTPRequestHandlerFactory(SafeHandler::Ptr options_) + : options(options_) { } Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest &) override { - return new MockRequestHandler(slowdown); + return new MockRequestHandler(options); } - std::shared_ptr> slowdown; + SafeHandler::Ptr options; }; } @@ -94,6 +133,8 @@ class ConnectionPoolTest : public testing::Test { protected: ConnectionPoolTest() { + options = std::make_shared>(); + startServer(); } @@ -102,7 +143,7 @@ class ConnectionPoolTest : public testing::Test { DB::HTTPConnectionPools::Limits def_limits{}; DB::HTTPConnectionPools::instance().setLimits(def_limits, def_limits, def_limits); - setSlowDown(0); + options->set(RequestOptions()); DB::HTTPConnectionPools::instance().dropCache(); DB::CurrentThread::getProfileEvents().reset(); @@ -129,7 +170,7 @@ class ConnectionPoolTest : public testing::Test { void startServer() { server_data.reset(); - server_data.handler_factory = new HTTPRequestHandlerFactory(slowdown_receive); + server_data.handler_factory = new HTTPRequestHandlerFactory(options); server_data.server = std::make_unique( server_data.handler_factory, server_data.port); @@ -143,11 +184,20 @@ class ConnectionPoolTest : public testing::Test { void setSlowDown(size_t seconds) { - *slowdown_receive = seconds; + auto opt = options->get(); + opt.slowdown_receive = seconds; + options->set(std::move(opt)); + } + + void setOverWriteTimeout(size_t seconds) + { + auto opt = options->get(); + opt.overwrite_keep_alive_timeout = int(seconds); + options->set(std::move(opt)); } DB::ConnectionTimeouts timeouts; - std::shared_ptr> slowdown_receive = std::make_shared>(0); + SafeHandler::Ptr options; struct ServerData { @@ -182,7 +232,7 @@ class ConnectionPoolTest : public testing::Test { void wait_until(std::function pred) { while (!pred()) - sleepForMilliseconds(250); + sleepForMilliseconds(10); } void echoRequest(String data, HTTPSession & session) @@ -245,45 +295,52 @@ TEST_F(ConnectionPoolTest, CanRequest) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); + auto metrics = pool->getMetrics(); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanPreserve) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); } - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 1; }); ASSERT_EQ(1, getServer().currentConnections()); - - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); } TEST_F(ConnectionPoolTest, CanReuse) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); } - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().stored_count)); - { auto connection = pool->getConnection(timeouts); - ASSERT_EQ(1, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 1; }); ASSERT_EQ(1, getServer().currentConnections()); @@ -293,6 +350,11 @@ TEST_F(ConnectionPoolTest, CanReuse) ASSERT_EQ(1, getServer().totalConnections()); ASSERT_EQ(1, getServer().currentConnections()); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + connection->reset(); } @@ -303,15 +365,16 @@ TEST_F(ConnectionPoolTest, CanReuse) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); } TEST_F(ConnectionPoolTest, CanReuse10) { auto pool = getPool(); - + auto metrics = pool->getMetrics(); for (int i = 0; i < 10; ++i) { @@ -328,16 +391,23 @@ TEST_F(ConnectionPoolTest, CanReuse10) ASSERT_EQ(0, getServer().currentConnections()); ASSERT_EQ(1, getServer().totalConnections()); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReuse5) { - timeouts.withHTTPKeepAliveTimeout(1); + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector connections; connections.reserve(5); @@ -347,11 +417,14 @@ TEST_F(ConnectionPoolTest, CanReuse5) } connections.clear(); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(5, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.stored_count)); wait_until([&] () { return getServer().currentConnections() == 5; }); ASSERT_EQ(5, getServer().currentConnections()); @@ -363,35 +436,56 @@ TEST_F(ConnectionPoolTest, CanReuse5) echoRequest("Hello", *connection); } - ASSERT_EQ(5, getServer().totalConnections()); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(5, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(5, CurrentMetrics::get(metrics.stored_count)); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(5, CurrentMetrics::get(pool->getMetrics().stored_count)); + /// wait until all connections are timeouted + wait_until([&] () { return getServer().currentConnections() == 0; }); + + { + // just to trigger pool->wipeExpired(); + auto connection = pool->getConnection(timeouts); + connection->reset(); + } + + ASSERT_EQ(6, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(5, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReconnectAndCreate) { auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector in_use; - const size_t count = 2; + const size_t count = 3; for (int i = 0; i < count; ++i) { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); in_use.push_back(connection); } - ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(count, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(count, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); auto connection = std::move(in_use.back()); in_use.pop_back(); @@ -402,28 +496,39 @@ TEST_F(ConnectionPoolTest, CanReconnectAndCreate) echoRequest("Hello", *connection); - connection->reset(); - - wait_until([&] () { return getServer().currentConnections() == 1; }); - ASSERT_EQ(1, getServer().currentConnections()); - ASSERT_EQ(count+1, getServer().totalConnections()); + ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(count+1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, CanReconnectAndReuse) { + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + auto pool = getPool(); + auto metrics = pool->getMetrics(); std::vector in_use; - const size_t count = 2; + const size_t count = 3; + for (int i = 0; i < count; ++i) + { + auto connection = pool->getConnection(timeouts); + /// make some request in order to show to the server the keep alive headers + echoRequest("Hello", *connection); + in_use.push_back(std::move(connection)); + } + in_use.clear(); + for (int i = 0; i < count; ++i) { auto connection = pool->getConnection(timeouts); - // DB::setReuseTag(*connection); in_use.push_back(std::move(connection)); } @@ -441,11 +546,16 @@ TEST_F(ConnectionPoolTest, CanReconnectAndReuse) wait_until([&] () { return getServer().currentConnections() == 0; }); ASSERT_EQ(0, getServer().currentConnections()); - ASSERT_EQ(2, getServer().totalConnections()); + ASSERT_EQ(count, getServer().totalConnections()); + + ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(count + count - 1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(count + 1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(count, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); + ASSERT_EQ(count-1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(count-2, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, ReceiveTimeout) @@ -454,6 +564,7 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) timeouts.withReceiveTimeout(1); auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); @@ -462,10 +573,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); { timeouts.withReceiveTimeout(3); @@ -475,10 +590,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); { /// timeouts have effect for reused session @@ -489,10 +608,14 @@ TEST_F(ConnectionPoolTest, ReceiveTimeout) ); } - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reused]); - ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) @@ -500,6 +623,7 @@ TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) std::string_view message = "Hello ReadWriteBufferFromHTTP"; auto uri = Poco::URI(getServerUrl()); auto metrics = DB::HTTPConnectionPools::instance().getPool(DB::HTTPConnectionGroupType::HTTP, uri, DB::ProxyConfiguration{})->getMetrics(); + Poco::Net::HTTPBasicCredentials empty_creds; auto buf_from_http = DB::BuilderRWBufferFromHTTP(uri) .withConnectionGroup(DB::HTTPConnectionGroupType::HTTP) @@ -527,6 +651,7 @@ TEST_F(ConnectionPoolTest, ReadWriteBufferFromHTTP) ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); @@ -538,23 +663,26 @@ TEST_F(ConnectionPoolTest, HardLimit) DB::HTTPConnectionPools::instance().setLimits(zero_limits, zero_limits, zero_limits); auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); } - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); - + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); } TEST_F(ConnectionPoolTest, NoReceiveCall) { auto pool = getPool(); + auto metrics = pool->getMetrics(); { auto connection = pool->getConnection(timeouts); @@ -570,11 +698,124 @@ TEST_F(ConnectionPoolTest, NoReceiveCall) connection->flushRequest(); } - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().active_count)); - ASSERT_EQ(0, CurrentMetrics::get(pool->getMetrics().stored_count)); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().created]); - ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[pool->getMetrics().preserved]); - ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[pool->getMetrics().reset]); +TEST_F(ConnectionPoolTest, ReconnectedWhenConnectionIsHoldTooLong) +{ + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + + echoRequest("Hello", *connection); + + auto fake_ka = Poco::Timespan(30 * 1000 * 1000); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(fake_ka); + DB::setTimeouts(*connection, timeouts); // new keep alive timeout has no effect + + wait_until([&] () { return getServer().currentConnections() == 0; }); + + ASSERT_EQ(1, connection->connected()); + ASSERT_EQ(1, connection->getKeepAlive()); + ASSERT_EQ(1000, connection->getKeepAliveTimeout().totalMilliseconds()); + ASSERT_EQ(1, connection->isKeepAliveExpired(connection->getKeepAliveReliability())); + + echoRequest("Hello", *connection); + } + + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, ReconnectedWhenConnectionIsNearlyExpired) +{ + auto ka = Poco::Timespan(1, 0); // 1 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + + sleepForMilliseconds(900); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + } + } + + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); +} + +TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + } + + { + setOverWriteTimeout(1); + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(1, connection->getKeepAliveTimeout().totalSeconds()); + } + + { + // server do not overwrite it in the following requests but client has to remember last agreed value + setOverWriteTimeout(0); + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); + ASSERT_EQ(1, connection->getKeepAliveTimeout().totalSeconds()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(3, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(2, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); } diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 6608a35a5a2c..8d6d8ebc1a24 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -128,9 +128,9 @@ namespace DB M(Bool, format_alter_operations_with_parentheses, false, "If enabled, each operation in alter queries will be surrounded with parentheses in formatted queries to make them less ambiguous.", 0) \ M(String, default_replica_path, "/clickhouse/tables/{uuid}/{shard}", "The path to the table in ZooKeeper", 0) \ M(String, default_replica_name, "{replica}", "The replica name in ZooKeeper", 0) \ - M(UInt64, disk_connections_soft_limit, 1000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_soft_limit, 5000, "Connections above this limit have significantly shorter time to live. The limit applies to the disks connections.", 0) \ M(UInt64, disk_connections_warn_limit, 10000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the disks connections.", 0) \ - M(UInt64, disk_connections_store_limit, 12000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ + M(UInt64, disk_connections_store_limit, 30000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the disks connections.", 0) \ M(UInt64, storage_connections_soft_limit, 100, "Connections above this limit have significantly shorter time to live. The limit applies to the storages connections.", 0) \ M(UInt64, storage_connections_warn_limit, 1000, "Warning massages are written to the logs if number of in-use connections are higher than this limit. The limit applies to the storages connections.", 0) \ M(UInt64, storage_connections_store_limit, 5000, "Connections above this limit reset after use. Set to 0 to turn connection cache off. The limit applies to the storages connections.", 0) \ diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index c4b636103fe2..8813c9581852 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -144,7 +144,11 @@ ConnectionTimeouts ConnectionTimeouts::getAdaptiveTimeouts(const String & method void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts) { session.setTimeout(timeouts.connection_timeout, timeouts.send_timeout, timeouts.receive_timeout); - session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + /// we can not change keep alive timeout for already initiated connections + if (!session.connected()) + { + session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + } } ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session) From 0bc743b4e2144d6de39500aa753d9d02675caa18 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 15:24:16 +0200 Subject: [PATCH 186/470] Close: https://github.com/ClickHouse/ClickHouse/issues/35608 --- ...alyzer_numeric_literals_as_column_names.reference | 2 ++ ...079_analyzer_numeric_literals_as_column_names.sql | 12 ++++++++++++ 2 files changed, 14 insertions(+) create mode 100644 tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.reference create mode 100644 tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql diff --git a/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.reference b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.reference new file mode 100644 index 000000000000..faff07c519f8 --- /dev/null +++ b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.reference @@ -0,0 +1,2 @@ +\N 1 +str diff --git a/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql new file mode 100644 index 000000000000..e6bcad345064 --- /dev/null +++ b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql @@ -0,0 +1,12 @@ +CREATE TABLE testdata (`1` String) ENGINE=MergeTree ORDER BY tuple(); +INSERT INTO testdata VALUES ('testdata'); + +SELECT * +FROM ( + SELECT if(isValidUTF8(`1`), NULL, 'error!') AS error_message, + if(error_message IS NULL, 1, 0) AS valid + FROM testdata +) +WHERE valid; + +select * from (select 'str' as `1`) where 1; From 6c14f6ecf2aba16e407fe887ef38e59d9931510d Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Apr 2024 13:25:04 +0000 Subject: [PATCH 187/470] Close: https://github.com/ClickHouse/ClickHouse/pull/62185 --- .../03033_cte_numbers_memory.reference | 10 ++++++++++ .../0_stateless/03033_cte_numbers_memory.sql | 15 +++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 tests/queries/0_stateless/03033_cte_numbers_memory.reference create mode 100644 tests/queries/0_stateless/03033_cte_numbers_memory.sql diff --git a/tests/queries/0_stateless/03033_cte_numbers_memory.reference b/tests/queries/0_stateless/03033_cte_numbers_memory.reference new file mode 100644 index 000000000000..8d2470dea442 --- /dev/null +++ b/tests/queries/0_stateless/03033_cte_numbers_memory.reference @@ -0,0 +1,10 @@ +0 +1 +2 +3 +4 +0 +1 +2 +3 +4 diff --git a/tests/queries/0_stateless/03033_cte_numbers_memory.sql b/tests/queries/0_stateless/03033_cte_numbers_memory.sql new file mode 100644 index 000000000000..0e3ee9abd652 --- /dev/null +++ b/tests/queries/0_stateless/03033_cte_numbers_memory.sql @@ -0,0 +1,15 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61238 + +WITH +(SELECT number FROM system.numbers LIMIT 1) as w1, +(SELECT number FROM system.numbers LIMIT 1) as w2, +(SELECT number FROM system.numbers LIMIT 1) as w3, +(SELECT number FROM system.numbers LIMIT 1) as w4, +(SELECT number FROM system.numbers LIMIT 1) as w5, +(SELECT number FROM system.numbers LIMIT 1) as w6 +SELECT number FROM ( + SELECT number FROM system.numbers LIMIT 10 + UNION ALL + SELECT number FROM system.numbers LIMIT 10 +) +WHERE number < 5; From 922a14eaf1fd22d1a364ec285851c50cbb2ad54f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Apr 2024 15:33:35 +0200 Subject: [PATCH 188/470] fix stored_count metric --- src/Common/HTTPConnectionPool.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 21165bbc62d5..ab0ea1571d3f 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -560,6 +560,11 @@ class EndpointConnectionPool : public std::enable_shared_from_this & expired_connections) TSA_REQUIRES(mutex) { + SCOPE_EXIT({ + CurrentMetrics::sub(getMetrics().stored_count, expired_connections.size()); + ProfileEvents::increment(getMetrics().expired, expired_connections.size()); + }); + auto isSoftLimitReached = group->isSoftLimitReached(); while (!stored_connections.empty()) { @@ -573,9 +578,6 @@ class EndpointConnectionPool : public std::enable_shared_from_this Date: Wed, 3 Apr 2024 10:37:39 -0300 Subject: [PATCH 189/470] remove unnecessary matching character --- src/IO/S3/URI.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 7f628d27f34e..8e364337db53 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -40,7 +40,7 @@ URI::URI(const std::string & uri_) /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html - static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.:]+)\.vpce.amazonaws.com)"); + static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com)"); /// Case when bucket name and key represented in path of S3 URL. /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) From ee1c177d10a3c333662ba774af7fb2618193fac1 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Apr 2024 13:49:53 +0000 Subject: [PATCH 190/470] Closes: https://github.com/ClickHouse/ClickHouse/issues/29838 --- .../03080_incorrect_join_with_merge.reference | 2 + .../03080_incorrect_join_with_merge.sql | 66 +++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 tests/queries/0_stateless/03080_incorrect_join_with_merge.reference create mode 100644 tests/queries/0_stateless/03080_incorrect_join_with_merge.sql diff --git a/tests/queries/0_stateless/03080_incorrect_join_with_merge.reference b/tests/queries/0_stateless/03080_incorrect_join_with_merge.reference new file mode 100644 index 000000000000..51993f072d58 --- /dev/null +++ b/tests/queries/0_stateless/03080_incorrect_join_with_merge.reference @@ -0,0 +1,2 @@ +2 +2 diff --git a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql new file mode 100644 index 000000000000..ae8e40f6d56e --- /dev/null +++ b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql @@ -0,0 +1,66 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/29838 + +CREATE TABLE first_table_lr +( + id String, + id2 String +) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_03080/alter', 'r1') +ORDER BY id; + + +CREATE TABLE first_table +( + id String, + id2 String +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'first_table_lr'); + + +CREATE TABLE second_table_lr +( + id String, + id2 String +) ENGINE = MergeTree() +ORDER BY id; + +CREATE TABLE second_table +( + id String, + id2 String +) +ENGINE = Distributed('test_shard_localhost', currentDatabase(), 'second_table_lr'); + +INSERT INTO first_table VALUES ('1', '2'), ('3', '4'); +INSERT INTO second_table VALUES ('1', '2'), ('3', '4'); + +CREATE TABLE two_tables +( + id String, + id2 String +) +ENGINE = Merge(currentDatabase(), '^(first_table)$'); + +SELECT + count() +FROM first_table as s +GLOBAL ANY JOIN second_table as f USING (id) +WHERE + f.id2 GLOBAL IN ( + SELECT + id2 + FROM second_table + GROUP BY id2 + ); + +SELECT + count() +FROM two_tables as s +GLOBAL ANY JOIN second_table as f USING (id) +WHERE + f.id2 GLOBAL IN ( + SELECT + id2 + FROM second_table + GROUP BY id2 + ); From 4bb8f12374dc58fc87ed76fb7abe04b926500ec8 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 15:32:12 +0200 Subject: [PATCH 191/470] Close: https://github.com/ClickHouse/ClickHouse/issues/35652 --- ...n_name_to_alias__virtual_columns.reference | 3 +++ ..._column_name_to_alias__virtual_columns.sql | 27 +++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.reference create mode 100644 tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql diff --git a/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.reference b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.reference new file mode 100644 index 000000000000..ccf161abe8d6 --- /dev/null +++ b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.reference @@ -0,0 +1,3 @@ +0 0 +0 0 + 0 0 diff --git a/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql new file mode 100644 index 000000000000..1fe19cdad2a5 --- /dev/null +++ b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql @@ -0,0 +1,27 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/35652 +CREATE TABLE test ( + id UInt64 +) +ENGINE = MergeTree() +SAMPLE BY intHash32(id) +ORDER BY intHash32(id); + +SELECT + any(id), + any(id) AS id +FROM test +SETTINGS prefer_column_name_to_alias = 1; + +SELECT + any(_sample_factor), + any(_sample_factor) AS _sample_factor +FROM test +SETTINGS prefer_column_name_to_alias = 1; + +SELECT + any(_partition_id), + any(_sample_factor), + any(_partition_id) AS _partition_id, + any(_sample_factor) AS _sample_factor +FROM test +SETTINGS prefer_column_name_to_alias = 1; From c539a6ba8d4fc5b6adc8e2ef9a240db37dda3550 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 15:41:40 +0200 Subject: [PATCH 192/470] Close: https://github.com/ClickHouse/ClickHouse/issues/36189 --- .../03081_analyzer_agg_func_CTE.reference | 2 ++ .../03081_analyzer_agg_func_CTE.sql | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 tests/queries/0_stateless/03081_analyzer_agg_func_CTE.reference create mode 100644 tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql diff --git a/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.reference b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.reference new file mode 100644 index 000000000000..bf1b89262365 --- /dev/null +++ b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.reference @@ -0,0 +1,2 @@ +2020-01-01 +9 diff --git a/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql new file mode 100644 index 000000000000..81dbbb3b62dd --- /dev/null +++ b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql @@ -0,0 +1,18 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/36189 +CREATE TABLE test +( + `dt` Date, + `text` String +) +ENGINE = MergeTree +ORDER BY dt; + +insert into test values ('2020-01-01', 'text1'), ('2019-01-01', 'text2'), ('1900-01-01', 'text3'); + +WITH max(dt) AS maxDt +SELECT maxDt +FROM test; + +WITH max(number) AS maxDt +SELECT maxDt +FROM numbers(10); From 0cda6cf5233a1d24e3c02ea402ee3be4bfa61625 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 15:51:16 +0200 Subject: [PATCH 193/470] Close: https://github.com/ClickHouse/ClickHouse/issues/39634 --- ...nalyzer_left_join_correct_column.reference | 2 ++ ...3082_analyzer_left_join_correct_column.sql | 30 +++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 tests/queries/0_stateless/03082_analyzer_left_join_correct_column.reference create mode 100644 tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql diff --git a/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.reference b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.reference new file mode 100644 index 000000000000..ad3f3d53ab58 --- /dev/null +++ b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.reference @@ -0,0 +1,2 @@ +pk1 2 +pk1 2 diff --git a/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql new file mode 100644 index 000000000000..93702fee5501 --- /dev/null +++ b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql @@ -0,0 +1,30 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/39634 +CREATE TABLE test1 +( + `pk` String, + `x.y` Decimal(18, 4) +) +ENGINE = MergeTree() +ORDER BY (pk); + +CREATE TABLE test2 +( + `pk` String, + `x.y` Decimal(18, 4) +) +ENGINE = MergeTree() +ORDER BY (pk); + +INSERT INTO test1 SELECT 'pk1', 1; + +INSERT INTO test2 SELECT 'pk1', 2; + +SELECT t1.pk, t2.x.y +FROM test1 t1 +LEFT JOIN test2 t2 + on t1.pk = t2.pk; + +SELECT t1.pk, t2.`x.y` +FROM test1 t1 +LEFT JOIN test2 t2 + on t1.pk = t2.pk; From 532d80e20b60987947ac11eb8c4991916742157f Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Wed, 3 Apr 2024 16:02:07 +0200 Subject: [PATCH 194/470] fix log level in debug code --- src/Common/HTTPConnectionPool.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index ab0ea1571d3f..2d3a87dda6b4 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -361,13 +361,16 @@ class EndpointConnectionPool : public std::enable_shared_from_this Date: Wed, 3 Apr 2024 16:17:56 +0200 Subject: [PATCH 195/470] Correctly handle const columns in DistinctTransfom --- src/Processors/Transforms/DistinctTransform.cpp | 1 + .../0_stateless/03033_distinct_transform_const_columns.reference | 1 + .../0_stateless/03033_distinct_transform_const_columns.sql | 1 + 3 files changed, 3 insertions(+) create mode 100644 tests/queries/0_stateless/03033_distinct_transform_const_columns.reference create mode 100644 tests/queries/0_stateless/03033_distinct_transform_const_columns.sql diff --git a/src/Processors/Transforms/DistinctTransform.cpp b/src/Processors/Transforms/DistinctTransform.cpp index 3619fa51bf6c..d528303a642b 100644 --- a/src/Processors/Transforms/DistinctTransform.cpp +++ b/src/Processors/Transforms/DistinctTransform.cpp @@ -55,6 +55,7 @@ void DistinctTransform::transform(Chunk & chunk) /// Convert to full column, because SetVariant for sparse column is not implemented. convertToFullIfSparse(chunk); + convertToFullIfConst(chunk); const auto num_rows = chunk.getNumRows(); auto columns = chunk.detachColumns(); diff --git a/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference b/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference new file mode 100644 index 000000000000..d05b1f927f4b --- /dev/null +++ b/tests/queries/0_stateless/03033_distinct_transform_const_columns.reference @@ -0,0 +1 @@ +0 0 diff --git a/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql b/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql new file mode 100644 index 000000000000..41df19ab64e8 --- /dev/null +++ b/tests/queries/0_stateless/03033_distinct_transform_const_columns.sql @@ -0,0 +1 @@ +SELECT DISTINCT COALESCE(COALESCE('') = toNullable('b3'), toUInt128(toNullable(2)), 2, 2, toLowCardinality(2), 2, 2, 2, toUInt128(toNullable(2)), materialize(2), toUInt128(2), 2, 2), COALESCE(COALESCE(COALESCE(materialize(''))) = 'b3', 2, 2, 2, toLowCardinality(2), toUInt128(2), 2, 2, 2, materialize(toUInt256(2)), 2, 2, 2) FROM numbers(100000); From 6661484e555081a01a587ffe1b0174baad11a7af Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 3 Apr 2024 16:31:01 +0200 Subject: [PATCH 196/470] Proper fix for LowCardinality together with JSONExtact functions (#61957) --- src/Functions/FunctionsJSON.h | 71 ++++++++++++++++--- .../00918_json_functions.reference | 6 ++ .../0_stateless/00918_json_functions.sql | 6 ++ ...74_extract_fixedstring_from_json.reference | 7 ++ .../02474_extract_fixedstring_from_json.sql | 7 ++ 5 files changed, 89 insertions(+), 8 deletions(-) diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h index 2539fa1aeb43..9e824fabc42f 100644 --- a/src/Functions/FunctionsJSON.h +++ b/src/Functions/FunctionsJSON.h @@ -257,7 +257,7 @@ class FunctionJSONHelpers } case MoveType::Key: { - key = (*arguments[j + 1].column).getDataAt(row).toView(); + key = arguments[j + 1].column->getDataAt(row).toView(); if (!moveToElementByKey(res_element, key)) return false; break; @@ -334,6 +334,26 @@ class FunctionJSONHelpers }; +template +class JSONExtractImpl; + +template +class JSONExtractKeysAndValuesImpl; + +/** +* Functions JSONExtract and JSONExtractKeysAndValues force the return type - it is specified in the last argument. +* For example - `SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))')` +* But by default ClickHouse decides on its own whether the return type will be LowCardinality based on the types of +* input arguments. +* And for these specific functions we cannot rely on this mechanism, so these functions have their own implementation - +* just convert all of the LowCardinality input columns to full ones, execute and wrap the resulting column in LowCardinality +* if needed. +*/ +template typename Impl> +constexpr bool functionForcesTheReturnType() +{ + return std::is_same_v, JSONExtractImpl> || std::is_same_v, JSONExtractKeysAndValuesImpl>; +} template typename Impl> class ExecutableFunctionJSON : public IExecutableFunction @@ -348,17 +368,50 @@ class ExecutableFunctionJSON : public IExecutableFunction String getName() const override { return Name::name; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { if (null_presence.has_null_constant) return result_type->createColumnConstWithDefaultValue(input_rows_count); - ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; - ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); - if (null_presence.has_nullable) - return wrapInNullable(temporary_result, arguments, result_type, input_rows_count); - return temporary_result; + if constexpr (functionForcesTheReturnType()) + { + ColumnsWithTypeAndName columns_without_low_cardinality = arguments; + + for (auto & column : columns_without_low_cardinality) + { + column.column = recursiveRemoveLowCardinality(column.column); + column.type = recursiveRemoveLowCardinality(column.type); + } + + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(columns_without_low_cardinality) : columns_without_low_cardinality; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, columns_without_low_cardinality, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } + else + { + ColumnsWithTypeAndName temporary_columns = null_presence.has_nullable ? createBlockWithNestedColumns(arguments) : arguments; + ColumnPtr temporary_result = chooseAndRunJSONParser(temporary_columns, json_return_type, input_rows_count); + + if (null_presence.has_nullable) + temporary_result = wrapInNullable(temporary_result, arguments, result_type, input_rows_count); + + if (result_type->lowCardinality()) + temporary_result = recursiveLowCardinalityTypeConversion(temporary_result, json_return_type, result_type); + + return temporary_result; + } } private: @@ -429,7 +482,6 @@ class FunctionBaseFunctionJSON : public IFunctionBase DataTypePtr json_return_type; }; - /// We use IFunctionOverloadResolver instead of IFunction to handle non-default NULL processing. /// Both NULL and JSON NULL should generate NULL value. If any argument is NULL, return NULL. template typename Impl> @@ -450,6 +502,10 @@ class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext bool isVariadic() const override { return true; } size_t getNumberOfArguments() const override { return 0; } bool useDefaultImplementationForNulls() const override { return false; } + bool useDefaultImplementationForLowCardinalityColumns() const override + { + return !functionForcesTheReturnType(); + } FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override { @@ -481,7 +537,6 @@ class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext } }; - struct NameJSONHas { static constexpr auto name{"JSONHas"}; }; struct NameIsValidJSON { static constexpr auto name{"isValidJSON"}; }; struct NameJSONLength { static constexpr auto name{"JSONLength"}; }; diff --git a/tests/queries/0_stateless/00918_json_functions.reference b/tests/queries/0_stateless/00918_json_functions.reference index 43b15ded93d3..078348cd20ff 100644 --- a/tests/queries/0_stateless/00918_json_functions.reference +++ b/tests/queries/0_stateless/00918_json_functions.reference @@ -286,3 +286,9 @@ v --show error: type should be const string --show error: index type should be integer --show error: key of map type should be String +\N +\N +Hello +Hello +Hello +Hello diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index e19dd17670e4..3d30ce841bac 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -326,3 +326,9 @@ SELECT JSONExtract('[]', JSONExtract('0', 'UInt256'), 'UInt256'); -- { serverErr SELECT '--show error: key of map type should be String'; SELECT JSONExtract('{"a": [100.0, 200], "b": [-100, 200.0, 300]}', 'Map(Int64, Array(Float64))'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":null}')), materialize('string_value'), 'LowCardinality(Nullable(String))'); +SELECT JSONExtract(materialize('{"string_value":null}'), materialize('string_value'), 'LowCardinality(Nullable(String))'); +SELECT JSONExtract(materialize('{"string_value":"Hello"}'), materialize('string_value'), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":"Hello"}')), materialize('string_value'), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize('{"string_value":"Hello"}'), materialize(toLowCardinality('string_value')), 'LowCardinality(Nullable(String))') AS x; +SELECT JSONExtract(materialize(toLowCardinality('{"string_value":"Hello"}')), materialize(toLowCardinality('string_value')), 'LowCardinality(Nullable(String))') AS x; diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference index 783d12fcf1a6..21ddf5d35123 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.reference @@ -8,3 +8,10 @@ \0\0\0\0\0 131231 131231 +1234 +1234 +{"b":131231} +\0\0\0\0 +1234567890 +18446744073709551615 +-9223372036854775807 diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql index cfc47e00cbac..bbb9f55062bc 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql @@ -6,3 +6,10 @@ SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(5)'); SELECT JSONExtract('{"a": 123456}', 'a', 'FixedString(6)'); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(5))') FROM numbers(2); SELECT JSONExtract(materialize('{"a": 131231}'), 'a', 'LowCardinality(FixedString(6))') FROM numbers(2); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": "1234"}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": {"b": 131231} }'), 'a', 'LowCardinality(FixedString(12))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(4))'); +SELECT JSONExtract(materialize('{"a": 131231, "b": 1234567890}'), 'b', 'LowCardinality(FixedString(10))'); +SELECT JSONExtract(materialize('{"a": 18446744073709551615}'), 'a', 'LowCardinality(FixedString(20))'); +SELECT JSONExtract(materialize('{"a": -9223372036854775807}'), 'a', 'LowCardinality(FixedString(20))'); From 0b53980221d419fc1d0227c851502cdaeafc7892 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 16:45:56 +0200 Subject: [PATCH 197/470] Close: https://github.com/ClickHouse/ClickHouse/issues/47432 --- ...03084_analyzer_join_column_alias.reference | 1 + .../03084_analyzer_join_column_alias.sql | 23 +++++++++++++++++++ 2 files changed, 24 insertions(+) create mode 100644 tests/queries/0_stateless/03084_analyzer_join_column_alias.reference create mode 100644 tests/queries/0_stateless/03084_analyzer_join_column_alias.sql diff --git a/tests/queries/0_stateless/03084_analyzer_join_column_alias.reference b/tests/queries/0_stateless/03084_analyzer_join_column_alias.reference new file mode 100644 index 000000000000..acbb84063471 --- /dev/null +++ b/tests/queries/0_stateless/03084_analyzer_join_column_alias.reference @@ -0,0 +1 @@ +2023-01-01 diff --git a/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql new file mode 100644 index 000000000000..caf65823532a --- /dev/null +++ b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql @@ -0,0 +1,23 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/47432 +create or replace table t1 +engine = MergeTree() +order by tuple() +as +select 1 as user_id, 2 as level; + + +create or replace table t2 +engine = MergeTree() +order by tuple() +as +select 1 as user_id, 'website' as event_source, '2023-01-01 00:00:00'::DateTime as timestamp; + + +alter table t2 +add column date Date alias toDate(timestamp); + +SELECT + any(t2.date) as any_val +FROM t1 AS t1 +LEFT JOIN t2 as t2 + ON (t1.user_id = t2.user_id); From bc9d380e5a88be17cda135c9f649103404204bb2 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 16:46:45 +0200 Subject: [PATCH 198/470] Close: https://github.com/ClickHouse/ClickHouse/issues/54910 --- .../03085_analyzer_alias_column_group_by.reference | 1 + .../0_stateless/03085_analyzer_alias_column_group_by.sql | 4 ++++ 2 files changed, 5 insertions(+) create mode 100644 tests/queries/0_stateless/03085_analyzer_alias_column_group_by.reference create mode 100644 tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql diff --git a/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.reference b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.reference new file mode 100644 index 000000000000..804f12662b87 --- /dev/null +++ b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.reference @@ -0,0 +1 @@ +String 1 diff --git a/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql new file mode 100644 index 000000000000..f4eaa5d9710f --- /dev/null +++ b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql @@ -0,0 +1,4 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/54910 +SELECT toTypeName(stat_standard_id) AS stat_standard_id_1, count(1) AS value +FROM ( SELECT 'string value' AS stat_standard_id ) +GROUP BY stat_standard_id_1 LIMIT 1 From a5107417449477f1dddb0fd76502c56652a4dd4a Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 16:49:35 +0200 Subject: [PATCH 199/470] Close: https://github.com/ClickHouse/ClickHouse/issues/57321 --- ...86_analyzer_window_func_part_of_group_by.reference | 2 ++ .../03086_analyzer_window_func_part_of_group_by.sql | 11 +++++++++++ 2 files changed, 13 insertions(+) create mode 100644 tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.reference create mode 100644 tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql diff --git a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.reference b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.reference new file mode 100644 index 000000000000..c8b2d51ae531 --- /dev/null +++ b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.reference @@ -0,0 +1,2 @@ +1 2 +2 2 diff --git a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql new file mode 100644 index 000000000000..55a60873a5a9 --- /dev/null +++ b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql @@ -0,0 +1,11 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/57321 +SELECT + ver, + max(ver) OVER () AS ver_max +FROM +( + SELECT 1 AS ver + UNION ALL + SELECT 2 AS ver +) +GROUP BY ver From 346a0ec53b1dd2984ee4fe55785d61cec3a5b11b Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 17:05:48 +0200 Subject: [PATCH 200/470] Close: https://github.com/ClickHouse/ClickHouse/issues/59154 --- .../03087_analyzer_subquery_with_alias.reference | 1 + .../03087_analyzer_subquery_with_alias.sql | 15 +++++++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/queries/0_stateless/03087_analyzer_subquery_with_alias.reference create mode 100644 tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql diff --git a/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.reference b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.reference new file mode 100644 index 000000000000..7660873d1031 --- /dev/null +++ b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.reference @@ -0,0 +1 @@ +[1] diff --git a/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql new file mode 100644 index 000000000000..98aca76fe494 --- /dev/null +++ b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql @@ -0,0 +1,15 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/59154 +SELECT * +FROM +( + WITH + assumeNotNull(( + SELECT 0.9 + )) AS TUNING, + ELEMENT_QUERY AS + ( + SELECT quantiles(TUNING)(1) + ) + SELECT * + FROM ELEMENT_QUERY +); From bbcecd26606df86e4c2359c2bab811892dd8d4f1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:07:44 +0200 Subject: [PATCH 201/470] Remove reverted PR from 24.3 changelog --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84e51c1efdfe..dd88f3ee2c79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -123,7 +123,6 @@ * Something was wrong with Apache Hive, which is experimental and not supported. [#60262](https://github.com/ClickHouse/ClickHouse/pull/60262) ([shanfengp](https://github.com/Aed-p)). * An improvement for experimental parallel replicas: force reanalysis if parallel replicas changed [#60362](https://github.com/ClickHouse/ClickHouse/pull/60362) ([Raúl Marín](https://github.com/Algunenano)). * Fix usage of plain metadata type with new disks configuration option [#60396](https://github.com/ClickHouse/ClickHouse/pull/60396) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Don't allow to set max_parallel_replicas to 0 as it doesn't make sense [#60430](https://github.com/ClickHouse/ClickHouse/pull/60430) ([Kruglov Pavel](https://github.com/Avogar)). * Try to fix logical error 'Cannot capture column because it has incompatible type' in mapContainsKeyLike [#60451](https://github.com/ClickHouse/ClickHouse/pull/60451) ([Kruglov Pavel](https://github.com/Avogar)). * Avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). * Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)). From 39205fe676b97923765a7fe8791f68a7f915de35 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 17:10:10 +0200 Subject: [PATCH 202/470] Close: https://github.com/ClickHouse/ClickHouse/issues/61014 --- ...3088_analyzer_ambiguous_column_multi_call.reference | 0 .../03088_analyzer_ambiguous_column_multi_call.sql | 10 ++++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.reference create mode 100644 tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql diff --git a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.reference b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql new file mode 100644 index 000000000000..84afdb295c24 --- /dev/null +++ b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql @@ -0,0 +1,10 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61014 +create database test_03088; + +create table test_03088.a (i int) engine = Log(); + +select + test_03088.a.i +from + test_03088.a, + test_03088.a as x; From ccf0953d8013284815102643cb735d6151352c04 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Wed, 3 Apr 2024 17:14:05 +0200 Subject: [PATCH 203/470] Close: https://github.com/ClickHouse/ClickHouse/issues/61950 --- .../03089_analyzer_alias_replacement.reference | 2 ++ .../0_stateless/03089_analyzer_alias_replacement.sql | 8 ++++++++ 2 files changed, 10 insertions(+) create mode 100644 tests/queries/0_stateless/03089_analyzer_alias_replacement.reference create mode 100644 tests/queries/0_stateless/03089_analyzer_alias_replacement.sql diff --git a/tests/queries/0_stateless/03089_analyzer_alias_replacement.reference b/tests/queries/0_stateless/03089_analyzer_alias_replacement.reference new file mode 100644 index 000000000000..2f1b638ff548 --- /dev/null +++ b/tests/queries/0_stateless/03089_analyzer_alias_replacement.reference @@ -0,0 +1,2 @@ +1 +4 diff --git a/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql b/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql new file mode 100644 index 000000000000..00a3795eab8b --- /dev/null +++ b/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql @@ -0,0 +1,8 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61950 + +with dummy + 1 as dummy select dummy from system.one; + +WITH dummy + 3 AS dummy +SELECT dummy + 1 AS y +FROM system.one +SETTINGS enable_global_with_statement = 1; From 5fadac4994f316761a0427702459ab604b0d8c8b Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 3 Apr 2024 15:24:44 +0000 Subject: [PATCH 204/470] Update contrib to new commit --- contrib/arrow | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/arrow b/contrib/arrow index 46e7ed11c2e0..8f36d71d1858 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 46e7ed11c2e0ef62ccbbe23e6a35a4988884e450 +Subproject commit 8f36d71d18587f1f315ec832f424183cb6519cbb From e737acb1f9fb08d0a187959b73dad2089d5404be Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 3 Apr 2024 17:39:26 +0200 Subject: [PATCH 205/470] beautify tests --- ...2_storage_memory_modify_settings.reference | 6 ++-- .../03032_storage_memory_modify_settings.sql | 36 ++++++++++--------- 2 files changed, 22 insertions(+), 20 deletions(-) diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference index f7d25c40a430..ca772aa7fd02 100644 --- a/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.reference @@ -3,9 +3,9 @@ TESTING MODIFY SMALLER BYTES 16384 65536 TESTING MODIFY SMALLER ROWS -9216 -8192 -4096 +1100 +1000 +500 TESTING ADD SETTINGS 50 1000 diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql index bfa13ee0ec8b..5a28af23fa7d 100644 --- a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql @@ -4,47 +4,47 @@ SELECT 'TESTING MODIFY SMALLER BYTES'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 8192, max_bytes_to_keep = 32768; -INSERT INTO memory SELECT * FROM numbers(0, 100); -INSERT INTO memory SELECT * FROM numbers(0, 3000); +INSERT INTO memory SELECT * FROM numbers(0, 100); -- 1024 bytes +INSERT INTO memory SELECT * FROM numbers(0, 3000); -- 16384 bytes SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(3000, 10000); +INSERT INTO memory SELECT * FROM numbers(3000, 10000); -- 65536 bytes SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); SELECT 'TESTING MODIFY SMALLER ROWS'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 200, max_rows_to_keep = 2000; -INSERT INTO memory SELECT * FROM numbers(0, 100); -INSERT INTO memory SELECT * FROM numbers(100, 1000); -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +INSERT INTO memory SELECT * FROM numbers(0, 100); -- 100 rows +INSERT INTO memory SELECT * FROM numbers(100, 1000); -- 1000 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total after deleting -INSERT INTO memory SELECT * FROM numbers(1000, 500); -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +INSERT INTO memory SELECT * FROM numbers(1000, 500); -- 500 rows +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 500 in total after deleting SELECT 'TESTING ADD SETTINGS'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; -INSERT INTO memory SELECT * FROM numbers(0, 50); +INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(50, 950); +INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(2000, 70); +INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(3000, 1100); +INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); SELECT 'TESTING ADD SETTINGS'; @@ -52,21 +52,23 @@ DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; -INSERT INTO memory SELECT * FROM numbers(0, 50); +INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(50, 950); +INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(2000, 70); +INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -INSERT INTO memory SELECT * FROM numbers(3000, 1100); +INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); SELECT 'TESTING INVALID SETTINGS'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; +ALTER TABLE memory MODIFY SETTING max_rows_to_keep = 1000; +ALTER TABLE memory MODIFY SETTING max_bytes_to_keep = 1000; ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100; -- { serverError 452 } ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 100; -- { serverError 452 } From e0e3db843aeb32fba9fa0def4fb1bc7f17943bc5 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 3 Apr 2024 18:03:45 +0200 Subject: [PATCH 206/470] Update 03032_storage_memory_modify_settings.sql --- .../0_stateless/03032_storage_memory_modify_settings.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql index 5a28af23fa7d..e59a5e4edb6c 100644 --- a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql @@ -67,9 +67,9 @@ SELECT total_rows FROM system.tables WHERE name = 'memory' and database = curren SELECT 'TESTING INVALID SETTINGS'; DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; -ALTER TABLE memory MODIFY SETTING max_rows_to_keep = 1000; -ALTER TABLE memory MODIFY SETTING max_bytes_to_keep = 1000; ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100; -- { serverError 452 } ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 100; -- { serverError 452 } +ALTER TABLE memory MODIFY SETTING max_rows_to_keep = 1000; +ALTER TABLE memory MODIFY SETTING max_bytes_to_keep = 1000; -DROP TABLE memory; \ No newline at end of file +DROP TABLE memory; From ca27cf3fde37442f76f3d244f8cc57b5c541ebc7 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 3 Apr 2024 16:38:41 +0000 Subject: [PATCH 207/470] fix mapper for gcs --- programs/server/config.xml | 2 +- src/IO/S3/URI.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index ea3ead47c32f..e92381eeb1e3 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -96,7 +96,7 @@ https://{bucket}.s3.amazonaws.com - https://{bucket}.storage.googleapis.com + https://storage.googleapis.com/{bucket} https://{bucket}.oss.aliyuncs.com diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 027cb624ed5c..0d8502ecf1f6 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -67,7 +67,7 @@ URI::URI(const std::string & uri_) else { mapper["s3"] = "https://{bucket}.s3.amazonaws.com"; - mapper["gs"] = "https://{bucket}.storage.googleapis.com"; + mapper["gs"] = "https://storage.googleapis.com/{bucket}"; mapper["oss"] = "https://{bucket}.oss.aliyuncs.com"; } From 463691922c43a9ee30ad7d0a3a7895fcbd76760a Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 3 Apr 2024 17:05:49 +0000 Subject: [PATCH 208/470] Fix: disable test for SMT --- .../0_stateless/02980_dist_insert_readonly_replica.sql.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 b/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 index 5bf40f34f5c4..aba742fa64a9 100644 --- a/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 +++ b/tests/queries/0_stateless/02980_dist_insert_readonly_replica.sql.j2 @@ -1,6 +1,7 @@ --- Tags: no-parallel, no-fasttest +-- Tags: no-parallel, no-fasttest, no-shared-merge-tree -- Tag no-parallel - due to static databases -- Tag no-fasttest - S3 is required +-- Tag no-shared-merge-tree - no reliable way to make SMT read-only in stateless test drop database if exists shard_0; drop database if exists shard_1; From 2db1e3451773c160382325334a6c2b992c8b3314 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 3 Apr 2024 19:06:33 +0200 Subject: [PATCH 209/470] Better logger name --- src/Interpreters/Cache/FileCache.cpp | 10 ++++++++-- src/Interpreters/Cache/SLRUFileCachePriority.cpp | 8 +++++--- src/Interpreters/Cache/SLRUFileCachePriority.h | 5 +++-- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 71dc0cca3a74..3dc13ba77378 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -90,9 +90,15 @@ FileCache::FileCache(const std::string & cache_name, const FileCacheSettings & s , metadata(settings.base_path, settings.background_download_queue_size_limit, settings.background_download_threads, write_cache_per_user_directory) { if (settings.cache_policy == "LRU") - main_priority = std::make_unique(settings.max_size, settings.max_elements); + { + main_priority = std::make_unique( + settings.max_size, settings.max_elements, nullptr, cache_name); + } else if (settings.cache_policy == "SLRU") - main_priority = std::make_unique(settings.max_size, settings.max_elements, settings.slru_size_ratio); + { + main_priority = std::make_unique( + settings.max_size, settings.max_elements, settings.slru_size_ratio, nullptr, nullptr, cache_name); + } else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown cache policy: {}", settings.cache_policy); diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 1400d3219c64..59e51ae31f1f 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -28,17 +28,19 @@ SLRUFileCachePriority::SLRUFileCachePriority( size_t max_elements_, double size_ratio_, LRUFileCachePriority::StatePtr probationary_state_, - LRUFileCachePriority::StatePtr protected_state_) + LRUFileCachePriority::StatePtr protected_state_, + const std::string & description_) : IFileCachePriority(max_size_, max_elements_) , size_ratio(size_ratio_) , protected_queue(LRUFileCachePriority(getRatio(max_size_, size_ratio), getRatio(max_elements_, size_ratio), protected_state_, - "protected")) + description_ + ", protected")) , probationary_queue(LRUFileCachePriority(getRatio(max_size_, 1 - size_ratio), getRatio(max_elements_, 1 - size_ratio), probationary_state_, - "probationary")) + description_ + ", probationary")) + , log(getLogger("SLRUFileCachePriority(" + description_ + ")")) { LOG_DEBUG( log, "Probationary queue {} in size and {} in elements. " diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index 4cf5bb0f1999..734828f55dd6 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -19,7 +19,8 @@ class SLRUFileCachePriority : public IFileCachePriority size_t max_elements_, double size_ratio_, LRUFileCachePriority::StatePtr probationary_state_ = nullptr, - LRUFileCachePriority::StatePtr protected_state_ = nullptr); + LRUFileCachePriority::StatePtr protected_state_ = nullptr, + const std::string & description_ = "none"); size_t getSize(const CachePriorityGuard::Lock & lock) const override; @@ -67,7 +68,7 @@ class SLRUFileCachePriority : public IFileCachePriority double size_ratio; LRUFileCachePriority protected_queue; LRUFileCachePriority probationary_queue; - LoggerPtr log = getLogger("SLRUFileCachePriority"); + LoggerPtr log; void increasePriority(SLRUIterator & iterator, const CachePriorityGuard::Lock & lock); From 11eb872ac61a94bce68e715f9f855d59fd4aadd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 19:06:36 +0200 Subject: [PATCH 210/470] Remove more nested includes --- src/Access/AccessBackup.cpp | 2 ++ src/Backups/BackupEntriesCollector.cpp | 11 +++++++---- src/Backups/BackupsWorker.cpp | 2 ++ src/Backups/RestorerFromBackup.cpp | 3 +++ src/Columns/ColumnAggregateFunction.cpp | 9 +++++---- src/Columns/ColumnAggregateFunction.h | 9 +-------- src/Columns/ColumnObject.h | 5 ++--- src/Common/FileChecker.cpp | 2 ++ src/Core/Field.cpp | 12 +++++++++++- src/Core/Field.h | 9 +++++++-- src/Core/SettingsEnums.cpp | 2 ++ src/Core/SettingsFields.h | 11 +++++------ .../SerializationAggregateFunction.cpp | 17 +++++++---------- src/Functions/FunctionsTimeWindow.h | 1 + src/Functions/array/mapOp.cpp | 7 ++++--- src/Interpreters/DatabaseCatalog.cpp | 2 ++ src/Storages/StorageLog.cpp | 2 ++ src/Storages/System/StorageSystemTables.cpp | 2 ++ .../TableFunctionMergeTreeIndex.cpp | 2 ++ 19 files changed, 69 insertions(+), 41 deletions(-) diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp index ba89899dd8f1..1110b9c4b213 100644 --- a/src/Access/AccessBackup.cpp +++ b/src/Access/AccessBackup.cpp @@ -16,6 +16,8 @@ #include #include #include + +#include #include namespace fs = std::filesystem; diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index c71ce195388c..2efb6bf84ad2 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -1,22 +1,25 @@ +#include +#include #include #include -#include -#include #include #include +#include #include #include #include #include #include #include -#include #include #include +#include #include #include + +#include #include -#include + #include namespace fs = std::filesystem; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 96fe770227c8..5cbc4ed1e41b 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -27,6 +27,8 @@ #include #include +#include + namespace CurrentMetrics { diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index ed1d5b8a103d..d8383fdbb1db 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -24,6 +24,9 @@ #include #include #include + +#include + #include #include diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 801aa8a91bb6..7533d2cb71b1 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -2,6 +2,11 @@ #include #include +#include +#include +#include +#include +#include #include #include #include @@ -11,10 +16,6 @@ #include #include #include -#include -#include -#include -#include namespace DB diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index ae7c5f0b54e5..ba33cf0ee14c 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -1,17 +1,10 @@ #pragma once #include - #include -#include - #include - -#include -#include -#include - #include +#include namespace DB { diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index a06235a45001..af12b99efcbb 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -1,12 +1,11 @@ #pragma once +#include #include #include -#include -#include -#include #include #include +#include #include diff --git a/src/Common/FileChecker.cpp b/src/Common/FileChecker.cpp index 098ea4b1ac46..5ecbe44530bc 100644 --- a/src/Common/FileChecker.cpp +++ b/src/Common/FileChecker.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace fs = std::filesystem; diff --git a/src/Core/Field.cpp b/src/Core/Field.cpp index 9c058d619024..73f0703f21e4 100644 --- a/src/Core/Field.cpp +++ b/src/Core/Field.cpp @@ -22,6 +22,12 @@ namespace ErrorCodes extern const int DECIMAL_OVERFLOW; } +template +T DecimalField::getScaleMultiplier() const +{ + return DecimalUtils::scaleMultiplier(scale); +} + inline Field getBinaryValue(UInt8 type, ReadBuffer & buf) { switch (static_cast(type)) @@ -627,5 +633,9 @@ std::string_view Field::getTypeName() const return fieldTypeToString(which); } - +template class DecimalField; +template class DecimalField; +template class DecimalField; +template class DecimalField; +template class DecimalField; } diff --git a/src/Core/Field.h b/src/Core/Field.h index aed5fab21065..eb01be6c43db 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -151,7 +150,7 @@ class DecimalField operator T() const { return dec; } /// NOLINT T getValue() const { return dec; } - T getScaleMultiplier() const { return DecimalUtils::scaleMultiplier(scale); } + T getScaleMultiplier() const; UInt32 getScale() const { return scale; } template @@ -200,6 +199,12 @@ class DecimalField UInt32 scale; }; +extern template class DecimalField; +extern template class DecimalField; +extern template class DecimalField; +extern template class DecimalField; +extern template class DecimalField; + template constexpr bool is_decimal_field = false; template <> constexpr inline bool is_decimal_field> = true; template <> constexpr inline bool is_decimal_field> = true; diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index c3f0715ad68d..e47174a3b9d1 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -2,6 +2,8 @@ #include #include +#include + namespace DB { diff --git a/src/Core/SettingsFields.h b/src/Core/SettingsFields.h index b39aa52c15df..64854e46ab5e 100644 --- a/src/Core/SettingsFields.h +++ b/src/Core/SettingsFields.h @@ -1,13 +1,12 @@ #pragma once -#include -#include -#include -#include -#include -#include #include #include +#include +#include +#include +#include +#include namespace DB diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 6f09ed31e22d..2ac23d52e28f 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -1,17 +1,14 @@ -#include - -#include - #include - -#include -#include -#include -#include - +#include #include #include +#include #include +#include +#include +#include +#include +#include namespace DB { diff --git a/src/Functions/FunctionsTimeWindow.h b/src/Functions/FunctionsTimeWindow.h index 4532286830de..6183d25c8bd4 100644 --- a/src/Functions/FunctionsTimeWindow.h +++ b/src/Functions/FunctionsTimeWindow.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index 613fd934c41b..50b64cf9809f 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -1,18 +1,19 @@ -#include #include +#include #include #include #include #include #include #include +#include #include #include #include #include -#include "Columns/ColumnMap.h" -#include "DataTypes/DataTypeMap.h" +#include +#include namespace DB { diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index ec6c8b5924f3..2536ce0ce2fb 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -27,6 +27,8 @@ #include #include +#include + #include "config.h" #if USE_MYSQL diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 549cfca1b6c4..b652750346f3 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -39,6 +39,8 @@ #include #include +#include + #define DBMS_STORAGE_LOG_DATA_FILE_EXTENSION ".bin" #define DBMS_STORAGE_LOG_MARKS_FILE_NAME "__marks.mrk" diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index edfc7213dcd6..9bd7ff945adb 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -25,6 +25,8 @@ #include #include +#include + namespace DB { diff --git a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp index 435ed4bdf0d6..06a48f0e25f7 100644 --- a/src/TableFunctions/TableFunctionMergeTreeIndex.cpp +++ b/src/TableFunctions/TableFunctionMergeTreeIndex.cpp @@ -10,6 +10,8 @@ #include #include +#include + namespace DB { From c70ecfe5f3bc37ad49dffb79158a86dc887de798 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 3 Apr 2024 19:18:56 +0200 Subject: [PATCH 211/470] Fix --- .../Cache/LRUFileCachePriority.cpp | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index ddc307554092..012ba14e5a81 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -307,25 +307,29 @@ bool LRUFileCachePriority::collectCandidatesForEviction( if (can_fit()) { - /// As eviction is done without a cache priority lock, - /// then if some space was partially available and some needed - /// to be freed via eviction, we need to make sure that this - /// partially available space is still available - /// after we finish with eviction for non-available space. - /// So we create a space holder for the currently available part - /// of the required space for the duration of eviction of the other - /// currently non-available part of the space. - - const size_t hold_size = size > stat.total_stat.releasable_size - ? size - stat.total_stat.releasable_size - : 0; - - const size_t hold_elements = elements > stat.total_stat.releasable_count - ? elements - stat.total_stat.releasable_count - : 0; - - if (hold_size || hold_elements) - res.setSpaceHolder(hold_size, hold_elements, *this, lock); + /// `res` contains eviction candidates. Do we have any? + if (res.size() > 0) + { + /// As eviction is done without a cache priority lock, + /// then if some space was partially available and some needed + /// to be freed via eviction, we need to make sure that this + /// partially available space is still available + /// after we finish with eviction for non-available space. + /// So we create a space holder for the currently available part + /// of the required space for the duration of eviction of the other + /// currently non-available part of the space. + + const size_t hold_size = size > stat.total_stat.releasable_size + ? size - stat.total_stat.releasable_size + : 0; + + const size_t hold_elements = elements > stat.total_stat.releasable_count + ? elements - stat.total_stat.releasable_count + : 0; + + if (hold_size || hold_elements) + res.setSpaceHolder(hold_size, hold_elements, *this, lock); + } // LOG_TEST(log, "Collected {} candidates for eviction (total size: {}). " // "Took hold of size {} and elements {}", From 0dbb249b4878f168fbcc4730437911f50158b6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 19:30:55 +0200 Subject: [PATCH 212/470] More nested includes --- src/Columns/ColumnAggregateFunction.cpp | 6 ++++++ src/Columns/ColumnAggregateFunction.h | 13 +++++++++++-- src/Columns/ColumnObject.h | 1 - src/Core/Block.cpp | 20 +++++++++----------- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 7533d2cb71b1..f7e6b1a1ccc9 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -110,6 +111,11 @@ ConstArenas concatArenas(const ConstArenas & array, ConstArenaPtr arena) } +std::string ColumnAggregateFunction::getName() const +{ + return "AggregateFunction(" + func->getName() + ")"; +} + MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr column) { /** If the aggregate function returns an unfinalized/unfinished state, diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index ba33cf0ee14c..3c836f50bdfa 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -19,6 +18,16 @@ using ArenaPtr = std::shared_ptr; using ConstArenaPtr = std::shared_ptr; using ConstArenas = std::vector; +using AggregateDataPtr = char *; +using AggregateDataPtrs = std::vector; +using ConstAggregateDataPtr = const char *; + +class IAggregateFunction; +using AggregateFunctionPtr = std::shared_ptr; + +class Context; +using ContextPtr = std::shared_ptr; + /** Column of states of aggregate functions. * Presented as an array of pointers to the states of aggregate functions (data). @@ -114,7 +123,7 @@ class ColumnAggregateFunction final : public COWHelpergetName() + ")"; } + std::string getName() const override; const char * getFamilyName() const override { return "AggregateFunction"; } TypeIndex getDataType() const override { return TypeIndex::AggregateFunction; } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index af12b99efcbb..e2936b27994f 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp index dfd60b994f4c..77dbad5443eb 100644 --- a/src/Core/Block.cpp +++ b/src/Core/Block.cpp @@ -1,19 +1,17 @@ -#include -#include - -#include - -#include -#include - -#include - +#include #include #include #include +#include +#include +#include +#include +#include +#include +#include #include -#include + #include From 40b901a2e3f37db01b899319aba75126c1047bbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 19:49:56 +0200 Subject: [PATCH 213/470] More crap --- src/Columns/ColumnAggregateFunction.h | 4 +- src/Interpreters/Aggregator.cpp | 43 +++++++++---------- src/Interpreters/InterpreterSelectQuery.cpp | 1 + src/Planner/Planner.cpp | 5 ++- src/Processors/QueryPlan/AggregatingStep.cpp | 1 + src/Processors/Transforms/WindowTransform.cpp | 26 +++++------ 6 files changed, 42 insertions(+), 38 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index 3c836f50bdfa..c5d854e208a0 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -2,7 +2,6 @@ #include #include -#include #include namespace DB @@ -28,6 +27,9 @@ using AggregateFunctionPtr = std::shared_ptr; class Context; using ContextPtr = std::shared_ptr; +struct ColumnWithTypeAndName; +using ColumnsWithTypeAndName = std::vector; + /** Column of states of aggregate functions. * Presented as an array of pointers to the states of aggregate functions (data). diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index ab8cec864ae5..bd78c7a1bc16 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -7,38 +7,37 @@ # include #endif -#include +#include +#include +#include +#include +#include #include -#include #include -#include -#include +#include +#include #include -#include -#include -#include -#include +#include #include -#include +#include +#include #include -#include +#include #include -#include -#include -#include -#include +#include +#include #include -#include -#include #include -#include -#include +#include #include +#include +#include +#include +#include +#include #include - -#include - -#include +#include +#include namespace ProfileEvents diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c47e3bdc49f6..d518c8d96b4b 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 5f73bba67a66..d75573c8d999 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1,9 +1,10 @@ #include +#include +#include #include -#include #include -#include +#include #include diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 74f293e5682f..0d7e05af1de1 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 02d2762dab30..f43b9a2e7948 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1,23 +1,23 @@ -#include - -#include - #include -#include -#include -#include -#include -#include -#include #include +#include +#include #include +#include +#include +#include #include #include -#include -#include +#include #include #include -#include +#include +#include +#include +#include +#include + +#include /// See https://fmt.dev/latest/api.html#formatting-user-defined-types From 925148f928c3d6165a5f8b217ca8a597b0962ab1 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 3 Apr 2024 20:32:48 +0200 Subject: [PATCH 214/470] Add logging --- src/Interpreters/Cache/FileCache.cpp | 1 - src/Interpreters/Cache/FileCacheFactory.cpp | 10 +++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 75e199c544b2..90671629e647 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1379,7 +1379,6 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, } } - if (new_settings.max_size != actual_settings.max_size || new_settings.max_elements != actual_settings.max_elements) { diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index e05e1935d952..e1f144060580 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -142,8 +142,10 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig caches_by_name_copy = caches_by_name; } + auto * log = &Poco::Logger::get("FileCacheFactory"); + std::unordered_set checked_paths; - for (const auto & [_, cache_info] : caches_by_name_copy) + for (const auto & [cache_name, cache_info] : caches_by_name_copy) { if (cache_info->config_path.empty() || checked_paths.contains(cache_info->config_path)) continue; @@ -155,7 +157,12 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig FileCacheSettings old_settings = cache_info->getSettings(); if (old_settings == new_settings) + { + LOG_TRACE(log, "No settings changes for cache: {}", cache_name); continue; + } + + LOG_TRACE(log, "Will apply settings changes for cache: {}", cache_name); try { @@ -166,6 +173,7 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig /// Settings changes could be partially applied in case of exception, /// make sure cache_info->settings show correct state of applied settings. cache_info->setSettings(old_settings); + tryLogCurrentException(__PRETTY_FUNCTION__); throw; } From 76b6c125ff136637437e200f4546c838f90f5bf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 20:36:29 +0200 Subject: [PATCH 215/470] Remove boost dep in wide integers when possible --- base/base/wide_integer_impl.h | 11 +++++++++-- src/IO/ReadBufferFromIStream.cpp | 2 ++ src/IO/SeekableReadBuffer.cpp | 1 + src/IO/VarInt.h | 3 +++ src/IO/WriteBufferFromOStream.cpp | 1 + 5 files changed, 16 insertions(+), 2 deletions(-) diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 17b1fa7cd6a5..0e98b6e5ee65 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -13,8 +13,6 @@ #include #include -#include - // NOLINTBEGIN(*) /// Use same extended double for all platforms @@ -22,6 +20,7 @@ #define CONSTEXPR_FROM_DOUBLE constexpr using FromDoubleIntermediateType = long double; #else +#include #include /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended` #define CONSTEXPR_FROM_DOUBLE @@ -309,6 +308,13 @@ struct integer::_impl constexpr uint64_t max_int = std::numeric_limits::max(); static_assert(std::is_same_v || std::is_same_v); /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast). +#if (LDBL_MANT_DIG == 64) + if (!std::isfinite(t)) + { + self = 0; + return; + } +#else if constexpr (std::is_same_v) { if (!std::isfinite(t)) @@ -325,6 +331,7 @@ struct integer::_impl return; } } +#endif const T alpha = t / static_cast(max_int); diff --git a/src/IO/ReadBufferFromIStream.cpp b/src/IO/ReadBufferFromIStream.cpp index bc90ec7ed15e..325beabaf81e 100644 --- a/src/IO/ReadBufferFromIStream.cpp +++ b/src/IO/ReadBufferFromIStream.cpp @@ -1,6 +1,8 @@ #include #include +#include + namespace DB { diff --git a/src/IO/SeekableReadBuffer.cpp b/src/IO/SeekableReadBuffer.cpp index 5d83f4e1b4a9..f2a114a5389a 100644 --- a/src/IO/SeekableReadBuffer.cpp +++ b/src/IO/SeekableReadBuffer.cpp @@ -1,5 +1,6 @@ #include +#include namespace DB { diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 9e72705341df..6dce80081708 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -5,6 +5,9 @@ #include #include +#include +#include + namespace DB { diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp index ffc3e62e9a66..e77ec079d1f9 100644 --- a/src/IO/WriteBufferFromOStream.cpp +++ b/src/IO/WriteBufferFromOStream.cpp @@ -1,6 +1,7 @@ #include #include +#include namespace DB { From be55c3533c9b3b4293bbb052dc8ed585af7c20b7 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 3 Apr 2024 15:48:30 -0300 Subject: [PATCH 216/470] default to path style --- src/IO/S3/URI.cpp | 15 +++------------ src/IO/tests/gtest_s3_uri.cpp | 6 +++--- 2 files changed, 6 insertions(+), 15 deletions(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 8e364337db53..02c77518ab69 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -108,19 +108,10 @@ URI::URI(const std::string & uri_) String name; String endpoint_authority_from_uri; - if (re2::RE2::FullMatch(uri.getAuthority(), aws_private_link_style_pattern)) - { - if (!re2::RE2::PartialMatch(uri.getPath(), path_style_pattern, &bucket, &key)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Could not parse bucket and key from uri {}", uri.toString()); - } + bool is_using_aws_private_link_interface = re2::RE2::FullMatch(uri.getAuthority(), aws_private_link_style_pattern); - // Default to virtual hosted style - is_virtual_hosted_style = true; - endpoint = uri.getScheme() + "://" + uri.getAuthority(); - validateBucket(bucket, uri); - } - else if (re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri)) + if (!is_using_aws_private_link_interface + && re2::RE2::FullMatch(uri.getAuthority(), virtual_hosted_style_pattern, &bucket, &name, &endpoint_authority_from_uri)) { is_virtual_hosted_style = true; endpoint = uri.getScheme() + "://" + name + endpoint_authority_from_uri; diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index b3ceb875362c..0a164b0dd612 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -79,21 +79,21 @@ const TestCase TestCases[] = { "root", "nested/file.txt", "", - true}, + false}, // Test with a file with no extension {S3::URI("https://bucket.vpce-03b2c987f1bd55c5f-j3b4vg7w.s3.ap-southeast-2.vpce.amazonaws.com/some_bucket/document"), "https://bucket.vpce-03b2c987f1bd55c5f-j3b4vg7w.s3.ap-southeast-2.vpce.amazonaws.com", "some_bucket", "document", "", - true}, + false}, // Test with a deeply nested file path {S3::URI("https://bucket.vpce-0242cd56f1bd55c5f-l5b7vg8x.s3.sa-east-1.vpce.amazonaws.com/some_bucket/b/c/d/e/f/g/h/i/j/data.json"), "https://bucket.vpce-0242cd56f1bd55c5f-l5b7vg8x.s3.sa-east-1.vpce.amazonaws.com", "some_bucket", "b/c/d/e/f/g/h/i/j/data.json", "", - true}, + false}, }; class S3UriTest : public testing::TestWithParam From de2a0be02580eb7501ad1fec5de35a7107ef9a1e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 18:50:33 +0000 Subject: [PATCH 217/470] Don't access static members through instance - clang-tidy rightfully complains (-readability-static-accessed-through-instance) - not going to enable the warning for now to avoid breaking the build --- programs/format/Format.cpp | 2 +- .../ExternalDictionaryLibraryUtils.h | 2 +- .../AggregateFunctionAnyHeavy.cpp | 12 ++++++------ .../AggregateFunctionFlameGraph.cpp | 6 +++--- .../AggregateFunctionGroupArray.cpp | 4 ++-- ...gateFunctionLargestTriangleThreeBuckets.cpp | 12 ++++++------ .../AggregateFunctionMannWhitney.cpp | 16 ++++++++-------- .../AggregateFunctionRankCorrelation.cpp | 14 +++++++------- .../AggregateFunctionRetention.cpp | 12 ++++++------ ...AggregateFunctionSimpleLinearRegression.cpp | 12 ++++++------ .../AggregateFunctionSingleValueOrNull.cpp | 16 ++++++++-------- .../AggregateFunctionStatistics.cpp | 18 +++++++++--------- .../Combinators/AggregateFunctionNull.h | 2 +- src/AggregateFunctions/ReservoirSampler.h | 4 ++-- src/Analyzer/Passes/QueryAnalysisPass.cpp | 3 +-- src/Columns/ColumnSparse.cpp | 2 +- src/Common/AsynchronousMetrics.cpp | 2 +- src/Common/HTTPConnectionPool.cpp | 2 +- src/Common/HashTable/HashMap.h | 6 +++--- src/Common/StackTrace.cpp | 2 +- src/Common/ThreadStatus.cpp | 2 +- src/Compression/CachedCompressedReadBuffer.cpp | 2 +- src/Databases/DatabaseOnDisk.cpp | 2 +- src/Databases/DatabaseOrdinary.cpp | 2 +- src/Databases/TablesDependencyGraph.cpp | 2 +- src/Dictionaries/FlatDictionary.cpp | 3 +-- src/Dictionaries/PolygonDictionaryUtils.h | 2 +- src/Disks/DiskLocal.cpp | 2 +- src/Functions/FunctionsExternalDictionaries.h | 6 +++--- .../UserDefinedSQLObjectsStorageBase.cpp | 2 +- src/IO/Archives/LibArchiveWriter.h | 2 +- src/IO/MMapReadBufferFromFileWithCache.cpp | 2 +- src/IO/ReadHelpers.h | 10 +++++----- src/IO/WriteHelpers.h | 4 ++-- src/Interpreters/AsynchronousInsertQueue.cpp | 2 +- .../ExecuteScalarSubqueriesVisitor.cpp | 2 +- .../InterpreterCreateFunctionQuery.cpp | 2 +- src/Interpreters/InterpreterDeleteQuery.cpp | 2 +- .../InterpreterDropFunctionQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 10 +++++----- src/Interpreters/TreeRewriter.cpp | 2 +- .../evaluateConstantExpression.cpp | 2 +- src/Planner/PlannerActionsVisitor.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 2 +- src/Processors/Executors/ExecutorTasks.cpp | 2 +- src/Processors/QueryPlan/ReadFromMergeTree.cpp | 4 ++-- src/Server/HTTP/ReadHeaders.cpp | 2 +- src/Storages/AlterCommands.cpp | 2 +- src/Storages/FileLog/StorageFileLog.h | 2 +- src/Storages/KeyDescription.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 16 ++++++++-------- src/Storages/MergeTree/KeyCondition.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeDataWriter.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- .../MergeTree/MutateFromLogEntryTask.cpp | 2 +- src/Storages/MergeTree/MutateTask.cpp | 12 ++++++------ .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 4 ++-- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/StorageS3.cpp | 2 +- 66 files changed, 149 insertions(+), 151 deletions(-) diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index fc73eda6815a..d4b975ce1e88 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -237,7 +237,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv) ASTPtr res = parseQueryAndMovePosition( parser, pos, end, "query", multiple, cmd_settings.max_query_size, cmd_settings.max_parser_depth, cmd_settings.max_parser_backtracks); - std::unique_ptr insert_query_payload = nullptr; + std::unique_ptr insert_query_payload; /// If the query is INSERT ... VALUES, then we will try to parse the data. if (auto * insert_query = res->as(); insert_query && insert_query->data) { diff --git a/programs/library-bridge/ExternalDictionaryLibraryUtils.h b/programs/library-bridge/ExternalDictionaryLibraryUtils.h index c9d03d27f75b..e6bf8f2a4c3b 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryUtils.h +++ b/programs/library-bridge/ExternalDictionaryLibraryUtils.h @@ -35,7 +35,7 @@ class CStringsHolder ExternalDictionaryLibraryAPI::CStrings strings; // will pass pointer to lib private: - std::unique_ptr ptr_holder = nullptr; + std::unique_ptr ptr_holder; Container strings_holder; }; diff --git a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp index 4f4d4a19cba1..ffddd46f2e38 100644 --- a/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp +++ b/src/AggregateFunctions/AggregateFunctionAnyHeavy.cpp @@ -115,34 +115,34 @@ class AggregateFunctionAnyHeavy final : public IAggregateFunctionDataHelperdata(place).add(*columns[0], row_num, arena); + data(place).add(*columns[0], row_num, arena); } void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override { - this->data(place).addManyDefaults(*columns[0], 0, arena); + data(place).addManyDefaults(*columns[0], 0, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).add(this->data(rhs), arena); + data(place).add(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf, *serialization); + data(place).write(buf, *serialization); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, *serialization, arena); + data(place).read(buf, *serialization, arena); } bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - this->data(place).insertResultInto(to); + data(place).insertResultInto(to); } }; diff --git a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp index f3d990460360..33e318b6c2fa 100644 --- a/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp +++ b/src/AggregateFunctions/AggregateFunctionFlameGraph.cpp @@ -559,7 +559,7 @@ class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelperdata(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); + data(place).add(ptr, allocated, trace_values.data() + prev_offset, trace_size, arena); } void addManyDefaults( @@ -572,7 +572,7 @@ class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelperdata(place).merge(this->data(rhs), arena); + data(place).merge(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional /* version */) const override @@ -590,7 +590,7 @@ class AggregateFunctionFlameGraph final : public IAggregateFunctionDataHelper(to); auto & str = assert_cast(array.getData()); - this->data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); + data(place).dumpFlameGraph(str.getChars(), str.getOffsets(), 0, 0); array.getOffsets().push_back(str.size()); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp index 6af8b1018dd6..630026521668 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.cpp +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.cpp @@ -89,10 +89,10 @@ struct GroupArraySamplerData chassert(lim != 0); /// With a large number of values, we will generate random numbers several times slower. - if (lim <= static_cast(rng.max())) + if (lim <= static_cast(pcg32_fast::max())) return rng() % lim; else - return (static_cast(rng()) * (static_cast(rng.max()) + 1ULL) + static_cast(rng())) % lim; + return (static_cast(rng()) * (static_cast(pcg32::max()) + 1ULL) + static_cast(rng())) % lim; } void randomShuffle() diff --git a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp index d5abdbc12fbe..b24b6c8996f2 100644 --- a/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp +++ b/src/AggregateFunctions/AggregateFunctionLargestTriangleThreeBuckets.cpp @@ -242,7 +242,7 @@ class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunc { Float64 x = getFloat64DataFromColumn(columns[0], row_num, this->x_type); Float64 y = getFloat64DataFromColumn(columns[1], row_num, this->y_type); - this->data(place).add(x, y, arena); + data(place).add(x, y, arena); } Float64 getFloat64DataFromColumn(const IColumn * column, size_t row_num, TypeIndex type_index) const @@ -264,25 +264,25 @@ class AggregateFunctionLargestTriangleThreeBuckets final : public IAggregateFunc void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena * arena) const override { - auto res = this->data(place).getResult(total_buckets, arena); + auto res = data(place).getResult(total_buckets, arena); auto & col = assert_cast(to); auto & col_offsets = assert_cast(col.getOffsetsColumn()); diff --git a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp index a70da7b35d57..e7bc5df335f7 100644 --- a/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp +++ b/src/AggregateFunctions/AggregateFunctionMannWhitney.cpp @@ -205,35 +205,35 @@ class AggregateFunctionMannWhitney final: UInt8 is_second = columns[1]->getUInt(row_num); if (is_second) - this->data(place).addY(value, arena); + data(place).addY(value, arena); else - this->data(place).addX(value, arena); + data(place).addX(value, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - if (!this->data(place).size_x || !this->data(place).size_y) + if (!data(place).size_x || !data(place).size_y) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName()); - auto [u_statistic, p_value] = this->data(place).getResult(alternative, continuity_correction); + auto [u_statistic, p_value] = data(place).getResult(alternative, continuity_correction); /// Because p-value is a probability. p_value = std::min(1.0, std::max(0.0, p_value)); diff --git a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp index d338808c7170..0c4726734ce9 100644 --- a/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp +++ b/src/AggregateFunctions/AggregateFunctionRankCorrelation.cpp @@ -66,31 +66,31 @@ class AggregateFunctionRankCorrelation : { Float64 new_x = columns[0]->getFloat64(row_num); Float64 new_y = columns[1]->getFloat64(row_num); - this->data(place).addX(new_x, arena); - this->data(place).addY(new_y, arena); + data(place).addX(new_x, arena); + data(place).addY(new_y, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - auto & a = this->data(place); - const auto & b = this->data(rhs); + auto & a = data(place); + const auto & b = data(rhs); a.merge(b, arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - auto answer = this->data(place).getResult(); + auto answer = data(place).getResult(); auto & column = static_cast &>(to); column.getData().push_back(answer); diff --git a/src/AggregateFunctions/AggregateFunctionRetention.cpp b/src/AggregateFunctions/AggregateFunctionRetention.cpp index 5eaa1a7a39c2..e9b46e62c146 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.cpp +++ b/src/AggregateFunctions/AggregateFunctionRetention.cpp @@ -102,24 +102,24 @@ class AggregateFunctionRetention final auto event = assert_cast *>(columns[i])->getData()[row_num]; if (event) { - this->data(place).add(i); + data(place).add(i); } } } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + data(place).merge(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override @@ -130,13 +130,13 @@ class AggregateFunctionRetention final ColumnArray::Offset current_offset = data_to.size(); data_to.resize(current_offset + events_size); - const bool first_flag = this->data(place).events.test(0); + const bool first_flag = data(place).events.test(0); data_to[current_offset] = first_flag; ++current_offset; for (size_t i = 1; i < events_size; ++i) { - data_to[current_offset] = (first_flag && this->data(place).events.test(i)); + data_to[current_offset] = (first_flag && data(place).events.test(i)); ++current_offset; } diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp index 75d2fe595d84..ce2f7ee195db 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.cpp @@ -123,22 +123,22 @@ class AggregateFunctionSimpleLinearRegression final : public IAggregateFunctionD Float64 x = columns[0]->getFloat64(row_num); Float64 y = columns[1]->getFloat64(row_num); - this->data(place).add(x, y); + data(place).add(x, y); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs)); + data(place).merge(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } static DataTypePtr createResultType() @@ -168,8 +168,8 @@ class AggregateFunctionSimpleLinearRegression final : public IAggregateFunctionD IColumn & to, Arena *) const override { - Float64 k = this->data(place).getK(); - Float64 b = this->data(place).getB(k); + Float64 k = data(place).getK(); + Float64 b = data(place).getB(k); auto & col_tuple = assert_cast(to); auto & col_k = assert_cast &>(col_tuple.getColumn(0)); diff --git a/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp index b14af34c5fc1..0625e37d1b0f 100644 --- a/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp +++ b/src/AggregateFunctions/AggregateFunctionSingleValueOrNull.cpp @@ -120,7 +120,7 @@ class AggregateFunctionSingleValueOrNull final void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - this->data(place).add(*columns[0], row_num, arena); + data(place).add(*columns[0], row_num, arena); } void addBatchSinglePlace( @@ -131,7 +131,7 @@ class AggregateFunctionSingleValueOrNull final Arena * arena, ssize_t if_argument_pos) const override { - if (this->data(place).isNull()) + if (data(place).isNull()) return; IAggregateFunctionDataHelper::addBatchSinglePlace( row_begin, row_end, place, columns, arena, if_argument_pos); @@ -146,7 +146,7 @@ class AggregateFunctionSingleValueOrNull final Arena * arena, ssize_t if_argument_pos) const override { - if (this->data(place).isNull()) + if (data(place).isNull()) return; IAggregateFunctionDataHelper::addBatchSinglePlaceNotNull( row_begin, row_end, place, columns, null_map, arena, if_argument_pos); @@ -154,29 +154,29 @@ class AggregateFunctionSingleValueOrNull final void addManyDefaults(AggregateDataPtr __restrict place, const IColumn ** columns, size_t, Arena * arena) const override { - this->data(place).add(*columns[0], 0, arena); + data(place).add(*columns[0], 0, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).add(this->data(rhs), arena); + data(place).add(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf, *serialization); + data(place).write(buf, *serialization); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, *serialization, arena); + data(place).read(buf, *serialization, arena); } bool allocatesMemoryInArena() const override { return singleValueTypeAllocatesMemoryInArena(value_type_index); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - this->data(place).insertResultInto(to); + data(place).insertResultInto(to); } }; diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.cpp b/src/AggregateFunctions/AggregateFunctionStatistics.cpp index e9d9b7409cad..15fede94fe76 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.cpp +++ b/src/AggregateFunctions/AggregateFunctionStatistics.cpp @@ -150,13 +150,13 @@ class AggregateFunctionVariance final Float64 getResult(ConstAggregateDataPtr __restrict place) const { - const auto & data = this->data(place); + const auto & dt = data(place); switch (kind) { - case VarKind::varSampStable: return getVarSamp(data.m2, data.count); - case VarKind::stddevSampStable: return getStddevSamp(data.m2, data.count); - case VarKind::varPopStable: return getVarPop(data.m2, data.count); - case VarKind::stddevPopStable: return getStddevPop(data.m2, data.count); + case VarKind::varSampStable: return getVarSamp(dt.m2, dt.count); + case VarKind::stddevSampStable: return getStddevSamp(dt.m2, dt.count); + case VarKind::varPopStable: return getVarPop(dt.m2, dt.count); + case VarKind::stddevPopStable: return getStddevPop(dt.m2, dt.count); } } @@ -182,22 +182,22 @@ class AggregateFunctionVariance final void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override { - this->data(place).update(*columns[0], row_num); + data(place).update(*columns[0], row_num); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).mergeWith(this->data(rhs)); + data(place).mergeWith(data(rhs)); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).serialize(buf); + data(place).serialize(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - this->data(place).deserialize(buf); + data(place).deserialize(buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h index 306e293cae74..9d13b77664dd 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionNull.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionNull.h @@ -491,7 +491,7 @@ class AggregateFunctionNullVariadic final : public AggregateFunctionNullBase< std::vector nullable_filters; const IColumn * nested_columns[number_of_arguments]; - std::unique_ptr final_flags = nullptr; + std::unique_ptr final_flags; const UInt8 * final_flags_ptr = nullptr; if (if_argument_pos >= 0) diff --git a/src/AggregateFunctions/ReservoirSampler.h b/src/AggregateFunctions/ReservoirSampler.h index 37fc05a2e4ce..7b6ef1b2dc0e 100644 --- a/src/AggregateFunctions/ReservoirSampler.h +++ b/src/AggregateFunctions/ReservoirSampler.h @@ -258,10 +258,10 @@ class ReservoirSampler chassert(limit > 0); /// With a large number of values, we will generate random numbers several times slower. - if (limit <= static_cast(rng.max())) + if (limit <= static_cast(pcg32_fast::max())) return rng() % limit; else - return (static_cast(rng()) * (static_cast(rng.max()) + 1ULL) + static_cast(rng())) % limit; + return (static_cast(rng()) * (static_cast(pcg32_fast::max()) + 1ULL) + static_cast(rng())) % limit; } void sortIfNeeded() diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f5474ddb662a..fffb8f7f2819 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -1940,8 +1940,7 @@ std::vector QueryAnalyzer::collectIdentifierTypoHints(const Identifier & for (const auto & valid_identifier : valid_identifiers) prompting_strings.push_back(valid_identifier.getFullName()); - NamePrompter<1> prompter; - return prompter.getHints(unresolved_identifier.getFullName(), prompting_strings); + return NamePrompter<1>::getHints(unresolved_identifier.getFullName(), prompting_strings); } /** Wrap expression node in tuple element function calls for nested paths. diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index b9a173fd92c5..3c08ebbf8b45 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -346,7 +346,7 @@ ColumnPtr ColumnSparse::filter(const Filter & filt, ssize_t) const } auto res_values = values->filter(values_filter, values_result_size_hint); - return this->create(res_values, std::move(res_offsets), res_offset); + return create(res_values, std::move(res_offsets), res_offset); } void ColumnSparse::expand(const Filter & mask, bool inverted) diff --git a/src/Common/AsynchronousMetrics.cpp b/src/Common/AsynchronousMetrics.cpp index 0b9be18c84ee..ab54b180fbfc 100644 --- a/src/Common/AsynchronousMetrics.cpp +++ b/src/Common/AsynchronousMetrics.cpp @@ -671,7 +671,7 @@ void AsynchronousMetrics::update(TimePoint update_time, bool force_update) ReadableSize(rss), ReadableSize(difference)); - total_memory_tracker.setRSS(rss, free_memory_in_allocator_arenas); + MemoryTracker::setRSS(rss, free_memory_in_allocator_arenas); } } diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index cd2505df7f35..1c011880f8f5 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -203,7 +203,7 @@ class ConnectionGroup if (total_connections_in_group >= limits.warning_limit && total_connections_in_group >= mute_warning_until) { LOG_WARNING(log, "Too many active sessions in group {}, count {}, warning limit {}", type, total_connections_in_group, limits.warning_limit); - mute_warning_until = roundUp(total_connections_in_group, limits.warning_step); + mute_warning_until = roundUp(total_connections_in_group, HTTPConnectionPools::Limits::warning_step); } } diff --git a/src/Common/HashTable/HashMap.h b/src/Common/HashTable/HashMap.h index 5f4cb3968228..dc601bf13198 100644 --- a/src/Common/HashTable/HashMap.h +++ b/src/Common/HashTable/HashMap.h @@ -207,7 +207,7 @@ class HashMapTable : public HashTable void ALWAYS_INLINE mergeToViaEmplace(Self & that, Func && func) { DB::PrefetchingHelper prefetching; - size_t prefetch_look_ahead = prefetching.getInitialLookAheadValue(); + size_t prefetch_look_ahead = DB::PrefetchingHelper::getInitialLookAheadValue(); size_t i = 0; auto prefetch_it = advanceIterator(this->begin(), prefetch_look_ahead); @@ -216,10 +216,10 @@ class HashMapTable : public HashTable { if constexpr (prefetch) { - if (i == prefetching.iterationsToMeasure()) + if (i == DB::PrefetchingHelper::iterationsToMeasure()) { prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); - prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - prefetching.getInitialLookAheadValue()); + prefetch_it = advanceIterator(prefetch_it, prefetch_look_ahead - DB::PrefetchingHelper::getInitialLookAheadValue()); } if (prefetch_it != end) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 436b85ff30ba..891850ccb79f 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -366,7 +366,7 @@ String demangleAndCollapseNames(std::optional file, const char if (file.has_value()) { std::string_view file_copy = file.value(); - if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != file_copy.npos) + if (auto trim_pos = file_copy.find_last_of('/'); trim_pos != std::string_view::npos) file_copy.remove_suffix(file_copy.size() - trim_pos); if (file_copy.ends_with("functional")) return "?"; diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index cf50d305e956..8719a9e093ab 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -96,7 +96,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) stack_t altstack_description{}; altstack_description.ss_sp = alt_stack.getData(); altstack_description.ss_flags = 0; - altstack_description.ss_size = alt_stack.getSize(); + altstack_description.ss_size = ThreadStack::getSize(); if (0 != sigaltstack(&altstack_description, nullptr)) { diff --git a/src/Compression/CachedCompressedReadBuffer.cpp b/src/Compression/CachedCompressedReadBuffer.cpp index 0febfca75cc8..3476f436eeb2 100644 --- a/src/Compression/CachedCompressedReadBuffer.cpp +++ b/src/Compression/CachedCompressedReadBuffer.cpp @@ -38,7 +38,7 @@ void CachedCompressedReadBuffer::prefetch(Priority priority) bool CachedCompressedReadBuffer::nextImpl() { /// Let's check for the presence of a decompressed block in the cache, grab the ownership of this block, if it exists. - UInt128 key = cache->hash(path, file_pos); + UInt128 key = UncompressedCache::hash(path, file_pos); owned_cell = cache->getOrSet(key, [&]() { diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index d8acfb5fa01f..674e9afa8acc 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -68,7 +68,7 @@ std::pair createTableFromAST( ast_create_query.setDatabase(database_name); if (ast_create_query.select && ast_create_query.isView()) - ApplyWithSubqueryVisitor().visit(*ast_create_query.select); + ApplyWithSubqueryVisitor::visit(*ast_create_query.select); if (ast_create_query.as_table_function) { diff --git a/src/Databases/DatabaseOrdinary.cpp b/src/Databases/DatabaseOrdinary.cpp index 95bdcfc7dcef..90f777d7d1db 100644 --- a/src/Databases/DatabaseOrdinary.cpp +++ b/src/Databases/DatabaseOrdinary.cpp @@ -171,7 +171,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false); if (ast) { - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); auto * create_query = ast->as(); /// NOTE No concurrent writes are possible during database loading create_query->setDatabase(TSA_SUPPRESS_WARNING_FOR_READ(database_name)); diff --git a/src/Databases/TablesDependencyGraph.cpp b/src/Databases/TablesDependencyGraph.cpp index 4b05f19fe911..d227a3ac76b0 100644 --- a/src/Databases/TablesDependencyGraph.cpp +++ b/src/Databases/TablesDependencyGraph.cpp @@ -448,7 +448,7 @@ std::vector TablesDependencyGraph::getTables() const void TablesDependencyGraph::mergeWith(const TablesDependencyGraph & other) { for (const auto & other_node : other.nodes) - addDependencies(other_node->storage_id, other.getDependencies(*other_node)); + addDependencies(other_node->storage_id, TablesDependencyGraph::getDependencies(*other_node)); } diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index fc58ff525bda..e3b1e8a84e27 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -412,8 +412,7 @@ void FlatDictionary::blockToAttributes(const Block & block) { const auto keys_column = block.safeGetByPosition(0).column; - DictionaryKeysArenaHolder arena_holder; - DictionaryKeysExtractor keys_extractor({ keys_column }, arena_holder.getComplexKeyArena()); + DictionaryKeysExtractor keys_extractor({ keys_column }, DictionaryKeysArenaHolder::getComplexKeyArena()); size_t keys_size = keys_extractor.getKeysSize(); static constexpr size_t key_offset = 1; diff --git a/src/Dictionaries/PolygonDictionaryUtils.h b/src/Dictionaries/PolygonDictionaryUtils.h index 0acf0d23e5ee..0fd1fead456b 100644 --- a/src/Dictionaries/PolygonDictionaryUtils.h +++ b/src/Dictionaries/PolygonDictionaryUtils.h @@ -214,7 +214,7 @@ class GridRoot : public ICell static constexpr Coord kEps = 1e-4f; private: - std::unique_ptr> root = nullptr; + std::unique_ptr> root; Coord min_x = 0, min_y = 0; Coord max_x = 0, max_y = 0; const size_t k_min_intersections; diff --git a/src/Disks/DiskLocal.cpp b/src/Disks/DiskLocal.cpp index 1a8d46668e07..33f7ca1ec19f 100644 --- a/src/Disks/DiskLocal.cpp +++ b/src/Disks/DiskLocal.cpp @@ -581,7 +581,7 @@ try auto disk_ptr = std::static_pointer_cast(shared_from_this()); auto tmp_file = std::make_unique(disk_ptr); auto buf = std::make_unique(std::move(tmp_file)); - buf->write(data.data, data.PAGE_SIZE_IN_BYTES); + buf->write(data.data, DiskWriteCheckData::PAGE_SIZE_IN_BYTES); buf->finalize(); buf->sync(); } diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h index d3317e2dfcfc..4460a8bd7bd3 100644 --- a/src/Functions/FunctionsExternalDictionaries.h +++ b/src/Functions/FunctionsExternalDictionaries.h @@ -1139,7 +1139,7 @@ class FunctionDictGetHierarchy final : public IFunction getName()); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); return std::make_shared(removeNullable(hierarchical_attribute.type)); } @@ -1150,7 +1150,7 @@ class FunctionDictGetHierarchy final : public IFunction return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column, arguments[1].type, arguments[1].name}; auto key_column_casted = castColumnAccurate(key_column, removeNullable(hierarchical_attribute.type)); @@ -1205,7 +1205,7 @@ class FunctionDictIsIn final : public IFunction return result_type->createColumn(); auto dictionary = helper.getDictionary(arguments[0].column); - const auto & hierarchical_attribute = helper.getDictionaryHierarchicalAttribute(dictionary); + const auto & hierarchical_attribute = FunctionDictHelper::getDictionaryHierarchicalAttribute(dictionary); auto key_column = ColumnWithTypeAndName{arguments[1].column->convertToFullColumnIfConst(), arguments[1].type, arguments[2].name}; auto in_key_column = ColumnWithTypeAndName{arguments[2].column->convertToFullColumnIfConst(), arguments[2].type, arguments[2].name}; diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp index 4f47a46b10da..f251d11789fa 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsStorageBase.cpp @@ -23,7 +23,7 @@ ASTPtr normalizeCreateFunctionQuery(const IAST & create_function_query) auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; - FunctionNameNormalizer().visit(res.function_core.get()); + FunctionNameNormalizer::visit(res.function_core.get()); return ptr; } diff --git a/src/IO/Archives/LibArchiveWriter.h b/src/IO/Archives/LibArchiveWriter.h index f54a8ce23674..da566c82ff67 100644 --- a/src/IO/Archives/LibArchiveWriter.h +++ b/src/IO/Archives/LibArchiveWriter.h @@ -68,7 +68,7 @@ class LibArchiveWriter : public IArchiveWriter void startWritingFile(); void endWritingFile(); - std::unique_ptr stream_info TSA_GUARDED_BY(mutex) = nullptr; + std::unique_ptr stream_info TSA_GUARDED_BY(mutex); bool is_writing_file TSA_GUARDED_BY(mutex) = false; bool finalized TSA_GUARDED_BY(mutex) = false; mutable std::mutex mutex; diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index d53f3bc325db..68c0c7227ca8 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -26,7 +26,7 @@ void MMapReadBufferFromFileWithCache::init() MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache( MMappedFileCache & cache, const std::string & file_name, size_t offset, size_t length) { - mapped = cache.getOrSet(cache.hash(file_name, offset, length), [&] + mapped = cache.getOrSet(MMappedFileCache::hash(file_name, offset, length), [&] { return std::make_shared(file_name, offset, length); }); diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index fc1055390618..a9c861be13c1 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -822,7 +822,7 @@ inline ReturnType readDateTextImpl(ExtendedDayNum & date, ReadBuffer & buf, cons return false; /// When the parameter is out of rule or out of range, Date32 uses 1925-01-01 as the default value (-DateLUT::instance().getDayNumOffsetEpoch(), -16436) and Date uses 1970-01-01. - date = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(date_lut.getDayNumOffsetEpoch())); + date = date_lut.makeDayNum(local_date.year(), local_date.month(), local_date.day(), -static_cast(DateLUTImpl::getDayNumOffsetEpoch())); return ReturnType(true); } @@ -1880,10 +1880,10 @@ struct PcgDeserializer assertChar(' ', buf); readText(state, buf); - if (multiplier != rng.multiplier()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", rng.multiplier(), multiplier); - if (increment != rng.increment()) - throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", rng.increment(), increment); + if (multiplier != pcg32_fast::multiplier()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect multiplier in pcg32: expected {}, got {}", pcg32_fast::multiplier(), multiplier); + if (increment != pcg32_fast::increment()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect increment in pcg32: expected {}, got {}", pcg32_fast::increment(), increment); rng.state_ = state; } diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index b42b4e6e9789..a30e2feb4397 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -1390,9 +1390,9 @@ struct PcgSerializer { static void serializePcg32(const pcg32_fast & rng, WriteBuffer & buf) { - writeText(rng.multiplier(), buf); + writeText(pcg32_fast::multiplier(), buf); writeChar(' ', buf); - writeText(rng.increment(), buf); + writeText(pcg32_fast::increment(), buf); writeChar(' ', buf); writeText(rng.state_, buf); } diff --git a/src/Interpreters/AsynchronousInsertQueue.cpp b/src/Interpreters/AsynchronousInsertQueue.cpp index c05d1b8f979b..6b9ca34c2d7f 100644 --- a/src/Interpreters/AsynchronousInsertQueue.cpp +++ b/src/Interpreters/AsynchronousInsertQueue.cpp @@ -294,7 +294,7 @@ void AsynchronousInsertQueue::preprocessInsertQuery(const ASTPtr & query, const InterpreterInsertQuery interpreter(query, query_context, query_context->getSettingsRef().insert_allow_materialized_columns); auto table = interpreter.getTable(insert_query); - auto sample_block = interpreter.getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); + auto sample_block = InterpreterInsertQuery::getSampleBlock(insert_query, table, table->getInMemoryMetadataPtr(), query_context); if (!FormatFactory::instance().isInputFormat(insert_query.format)) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown input format {}", insert_query.format); diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 0cf138c14f6d..a70ff3c6c53e 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -112,7 +112,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr auto hash = subquery.getTreeHash(/*ignore_aliases=*/ true); const auto scalar_query_hash_str = toString(hash); - std::unique_ptr interpreter = nullptr; + std::unique_ptr interpreter; bool hit = false; bool is_local = false; diff --git a/src/Interpreters/InterpreterCreateFunctionQuery.cpp b/src/Interpreters/InterpreterCreateFunctionQuery.cpp index 18e9ba4a64b2..3f4a03c34973 100644 --- a/src/Interpreters/InterpreterCreateFunctionQuery.cpp +++ b/src/Interpreters/InterpreterCreateFunctionQuery.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes BlockIO InterpreterCreateFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); ASTCreateFunctionQuery & create_function_query = updated_query_ptr->as(); diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 07d23be78a7e..ee7749941453 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -35,7 +35,7 @@ InterpreterDeleteQuery::InterpreterDeleteQuery(const ASTPtr & query_ptr_, Contex BlockIO InterpreterDeleteQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const ASTDeleteQuery & delete_query = query_ptr->as(); auto table_id = getContext()->resolveStorageID(delete_query, Context::ResolveOrdinary); diff --git a/src/Interpreters/InterpreterDropFunctionQuery.cpp b/src/Interpreters/InterpreterDropFunctionQuery.cpp index 2661fd9058c2..7a273d4969b8 100644 --- a/src/Interpreters/InterpreterDropFunctionQuery.cpp +++ b/src/Interpreters/InterpreterDropFunctionQuery.cpp @@ -21,7 +21,7 @@ namespace ErrorCodes BlockIO InterpreterDropFunctionQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto updated_query_ptr = removeOnClusterClauseIfNeeded(query_ptr, getContext()); ASTDropFunctionQuery & drop_function_query = updated_query_ptr->as(); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index c47e3bdc49f6..75baefeffbac 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -412,8 +412,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!options.is_subquery) { if (context->getSettingsRef().enable_global_with_statement) - ApplyWithAliasVisitor().visit(query_ptr); - ApplyWithSubqueryVisitor().visit(query_ptr); + ApplyWithAliasVisitor::visit(query_ptr); + ApplyWithSubqueryVisitor::visit(query_ptr); } query_info.query = query_ptr->clone(); @@ -609,7 +609,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { query_info.is_parameterized_view = view->isParameterizedView(); - view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); + StorageView::replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( @@ -629,7 +629,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { /// Restore original view name. Save rewritten subquery for future usage in StorageView. - query_info.view_query = view->restoreViewName(getSelectQuery(), view_table); + query_info.view_query = StorageView::restoreViewName(getSelectQuery(), view_table); view = nullptr; } @@ -2434,7 +2434,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc agg_count.create(place); SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place)); - agg_count.set(place, *num_rows); + AggregateFunctionCount::set(place, *num_rows); auto column = ColumnAggregateFunction::create(func); column->insertFrom(place); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 5588fc55a642..914b3c3037d1 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1587,7 +1587,7 @@ void TreeRewriter::normalize( /// already normalized on initiator node, or not normalized and should remain unnormalized for /// compatibility. if (context_->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && settings.normalize_function_names) - FunctionNameNormalizer().visit(query.get()); + FunctionNameNormalizer::visit(query.get()); if (settings.optimize_move_to_prewhere) { diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index b5c3e00e2997..4e1a2bcf5ee4 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -73,7 +73,7 @@ std::optional evaluateConstantExpressionImpl(c /// already normalized on initiator node, or not normalized and should remain unnormalized for /// compatibility. if (context->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY && context->getSettingsRef().normalize_function_names) - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); auto syntax_result = TreeRewriter(context, no_throw).analyze(ast, source_columns); if (!syntax_result) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 326dd6833438..656b6cdaa6e6 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -169,7 +169,7 @@ class ActionNodeNameHelper { const auto & in_first_argument_node = function_node.getArguments().getNodes().at(0); const auto & in_second_argument_node = function_node.getArguments().getNodes().at(1); - in_function_second_argument_node_name = planner_context.createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); + in_function_second_argument_node_name = PlannerContext::createSetKey(in_first_argument_node->getResultType(), in_second_argument_node); } WriteBufferFromOwnString buffer; diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index e9c3795176a7..adab31adb40c 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -305,7 +305,7 @@ bool applyTrivialCountIfPossible( AggregateDataPtr place = state.data(); agg_count.create(place); SCOPE_EXIT_MEMORY_SAFE(agg_count.destroy(place)); - agg_count.set(place, num_rows.value()); + AggregateFunctionCount::set(place, num_rows.value()); auto column = ColumnAggregateFunction::create(function_node.getAggregateFunction()); column->insertFrom(place); diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index ec1fc539884a..1039cf0e97a1 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -121,7 +121,7 @@ void ExecutorTasks::pushTasks(Queue & queue, Queue & async_queue, ExecutionThrea /// Take local task from queue if has one. if (!queue.empty() && !context.hasAsyncTasks() - && context.num_scheduled_local_tasks < context.max_scheduled_local_tasks) + && context.num_scheduled_local_tasks < ExecutionThreadContext::max_scheduled_local_tasks) { ++context.num_scheduled_local_tasks; context.setTask(queue.front()); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f4607cad040a..bee42c3dddec 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1408,8 +1408,8 @@ static void buildIndexes( if (metadata_snapshot->hasPartitionKey()) { const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_expression_actions = data.getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); + auto minmax_columns_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_expression_actions = MergeTreeData::getMinMaxExpr(partition_key, ExpressionActionsSettings::fromContext(context)); indexes->minmax_idx_condition.emplace(filter_actions_dag, context, minmax_columns_names, minmax_expression_actions); indexes->partition_pruner.emplace(metadata_snapshot, filter_actions_dag, context, false /* strict */); diff --git a/src/Server/HTTP/ReadHeaders.cpp b/src/Server/HTTP/ReadHeaders.cpp index b70575010646..d6c7b8ddc0fe 100644 --- a/src/Server/HTTP/ReadHeaders.cpp +++ b/src/Server/HTTP/ReadHeaders.cpp @@ -77,7 +77,7 @@ void readHeaders( skipToNextLineOrEOF(in); Poco::trimRightInPlace(value); - headers.add(name, headers.decodeWord(value)); + headers.add(name, Poco::Net::MessageHeader::decodeWord(value)); ++fields; } } diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index eae5e1a8a478..281fc72dfc42 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -1143,7 +1143,7 @@ void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context { auto minmax_columns = metadata_copy.getColumnsRequiredForPartitionKey(); auto partition_key = metadata_copy.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata_copy.primary_key.expression_list_ast->children; metadata_copy.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( metadata_copy.columns, partition_key, minmax_columns, primary_key_asts, context)); diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 91d58540c943..0434213c5580 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -177,7 +177,7 @@ class StorageFileLog final : public IStorage, WithContext }; std::shared_ptr task; - std::unique_ptr directory_watch = nullptr; + std::unique_ptr directory_watch; void loadFiles(); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index d63b40e2b11e..2a697fa56547 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -172,7 +172,7 @@ KeyDescription KeyDescription::parse(const String & str, const ColumnsDescriptio ParserExpression parser; ASTPtr ast = parseQuery(parser, "(" + str + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); return getKeyFromAST(ast, columns, context); } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 570175f66147..8da46b39801e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -79,8 +79,8 @@ void IMergeTreeDataPart::MinMaxIndex::load(const MergeTreeData & data, const Par auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_column_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); size_t minmax_idx_size = minmax_column_types.size(); hyperrectangle.reserve(minmax_idx_size); @@ -112,8 +112,8 @@ IMergeTreeDataPart::MinMaxIndex::WrittenFiles IMergeTreeDataPart::MinMaxIndex::s auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); - auto minmax_column_types = data.getMinMaxColumnsTypes(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); + auto minmax_column_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); return store(minmax_column_names, minmax_column_types, part_storage, out_checksums); } @@ -204,7 +204,7 @@ void IMergeTreeDataPart::MinMaxIndex::appendFiles(const MergeTreeData & data, St { auto metadata_snapshot = data.getInMemoryMetadataPtr(); const auto & partition_key = metadata_snapshot->getPartitionKey(); - auto minmax_column_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_column_names = MergeTreeData::getMinMaxColumnsNames(partition_key); size_t minmax_idx_size = minmax_column_names.size(); for (size_t i = 0; i < minmax_idx_size; ++i) { @@ -1213,7 +1213,7 @@ void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) c return; if (!parent_part) - partition.appendFiles(storage, files); + MergeTreePartition::appendFiles(storage, files); if (!parent_part) minmax_idx->appendFiles(storage, files); @@ -2061,7 +2061,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!isEmpty() && !parent_part) { - for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) + for (const String & col_name : MergeTreeData::getMinMaxColumnsNames(partition_key)) { if (!checksums.files.contains("minmax_" + escapeForFileName(col_name) + ".idx")) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No minmax idx file checksum for column {}", col_name); @@ -2101,7 +2101,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!parent_part) { - for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) + for (const String & col_name : MergeTreeData::getMinMaxColumnsNames(partition_key)) check_file_not_empty("minmax_" + escapeForFileName(col_name) + ".idx"); } } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 2d57ea40c9cb..a779311c22b6 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1964,8 +1964,8 @@ KeyCondition::Description KeyCondition::getDescription() const /// This means that logical NOT is applied to leaf. bool negate = false; - std::unique_ptr left = nullptr; - std::unique_ptr right = nullptr; + std::unique_ptr left; + std::unique_ptr right; }; /// The algorithm is the same as in KeyCondition::checkInHyperrectangle diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8faed72b198d..e5ace0e5969d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5362,7 +5362,7 @@ void MergeTreeData::restoreDataFromBackup(RestorerFromBackup & restorer, const S return; if (!restorer.isNonEmptyTableAllowed() && getTotalActiveSizeInBytes() && backup->hasFiles(data_path_in_backup)) - restorer.throwTableIsNotEmpty(getStorageID()); + RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); restorePartsFromBackup(restorer, data_path_in_backup, partitions); } @@ -6687,7 +6687,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( auto * place = arena.alignedAlloc(size_of_state, align_of_state); func->create(place); if (const AggregateFunctionCount * agg_count = typeid_cast(func.get())) - agg_count->set(place, value.get()); + AggregateFunctionCount::set(place, value.get()); else { auto value_column = func->getArgumentTypes().front()->createColumnConst(1, value)->convertToFullColumnIfConst(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 6471f510291b..bcc936c57396 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -513,11 +513,11 @@ void MergeTreeDataSelectExecutor::filterPartsByPartition( { chassert(minmax_idx_condition && partition_pruner); const auto & partition_key = metadata_snapshot->getPartitionKey(); - minmax_columns_types = data.getMinMaxColumnsTypes(partition_key); + minmax_columns_types = MergeTreeData::getMinMaxColumnsTypes(partition_key); if (settings.force_index_by_date && (minmax_idx_condition->alwaysUnknownOrTrue() && partition_pruner->isUseless())) { - auto minmax_columns_names = data.getMinMaxColumnsNames(partition_key); + auto minmax_columns_names = MergeTreeData::getMinMaxColumnsNames(partition_key); throw Exception(ErrorCodes::INDEX_NOT_USED, "Neither MinMax index by columns ({}) nor partition expr is used and setting 'force_index_by_date' is set", fmt::join(minmax_columns_names, ", ")); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index cadd94867eca..64d4b1fd7ffd 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -426,7 +426,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( column.type = block.getByName(column.name).type; auto minmax_idx = std::make_shared(); - minmax_idx->update(block, data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); + minmax_idx->update(block, MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->getPartitionKey())); MergeTreePartition partition(block_with_partition.partition); @@ -656,7 +656,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( /// Size of part would not be greater than block.bytes() + epsilon size_t expected_size = block.bytes(); // just check if there is enough space on parent volume - data.reserveSpace(expected_size, parent_part->getDataPartStorage()); + MergeTreeData::reserveSpace(expected_size, parent_part->getDataPartStorage()); part_type = data.choosePartFormatOnDisk(expected_size, block.rows()).part_type; auto new_data_part = parent_part->getProjectionPartBuilder(part_name, is_temp).withPartType(part_type).build(); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 3ead766cba91..82c015f33ed3 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -195,7 +195,7 @@ void MergeTreeSink::finishDelayedChunk() { auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot)); - storage.incrementInsertedPartsProfileEvent(part->getType()); + StorageMergeTree::incrementInsertedPartsProfileEvent(part->getType()); /// Initiate async merge - it will be done if it's good time for merge and if there are space in 'background_pool'. storage.background_operations_assignee.trigger(); diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index 7536eb45903f..3415b08cebb4 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -116,7 +116,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() /// Once we mutate part, we must reserve space on the same disk, because mutations can possibly create hardlinks. /// Can throw an exception. - reserved_space = storage.reserveSpace(estimated_space_for_result, source_part->getDataPartStorage()); + reserved_space = StorageReplicatedMergeTree::reserveSpace(estimated_space_for_result, source_part->getDataPartStorage()); future_mutated_part->updatePath(storage, reserved_space.get()); table_lock_holder = storage.lockForShare( diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 90e1cb0606e1..a971c4fda1c6 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -980,13 +980,13 @@ struct MutationContext QueryPipelineBuilder mutating_pipeline_builder; QueryPipeline mutating_pipeline; // in - std::unique_ptr mutating_executor{nullptr}; + std::unique_ptr mutating_executor; ProgressCallback progress_callback; Block updated_header; std::unique_ptr interpreter; - UInt64 watch_prev_elapsed{0}; - std::unique_ptr stage_progress{nullptr}; + UInt64 watch_prev_elapsed = 0; + std::unique_ptr stage_progress; MutationCommands commands_for_part; MutationCommands for_interpreter; @@ -998,12 +998,12 @@ struct MutationContext NameSet materialized_statistics; MergeTreeData::MutableDataPartPtr new_data_part; - IMergedBlockOutputStreamPtr out{nullptr}; + IMergedBlockOutputStreamPtr out; String mrk_extension; std::vector projections_to_build; - IMergeTreeDataPart::MinMaxIndexPtr minmax_idx{nullptr}; + IMergeTreeDataPart::MinMaxIndexPtr minmax_idx; std::set indices_to_recalc; std::set stats_to_recalc; @@ -1283,7 +1283,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) - ctx->minmax_idx->update(cur_block, ctx->data->getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); + ctx->minmax_idx->update(cur_block, MergeTreeData::getMinMaxColumnsNames(ctx->metadata_snapshot->getPartitionKey())); ctx->out->write(cur_block); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 7fcf6b971bb5..e1f28c2a9515 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -289,7 +289,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } - auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context, async_insert_info); using DelayedPartition = typename ReplicatedMergeTreeSinkImpl::DelayedChunk::Partition; using DelayedPartitions = std::vector; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 0ca7a4d74d93..268f2d66c96e 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -33,7 +33,7 @@ static String formattedASTNormalized(const ASTPtr & ast) if (!ast) return ""; auto ast_normalized = ast->clone(); - FunctionNameNormalizer().visit(ast_normalized.get()); + FunctionNameNormalizer::visit(ast_normalized.get()); WriteBufferFromOwnString buf; formatAST(*ast_normalized, buf, false, true); return buf.str(); diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 9a3c17923d87..d552a4b6fa5a 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -585,7 +585,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); auto partition_key = metadata.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( columns, partition_key, minmax_columns, primary_key_asts, context)); @@ -694,7 +694,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto minmax_columns = metadata.getColumnsRequiredForPartitionKey(); auto partition_key = metadata.partition_key.expression_list_ast->clone(); - FunctionNameNormalizer().visit(partition_key.get()); + FunctionNameNormalizer::visit(partition_key.get()); auto primary_key_asts = metadata.primary_key.expression_list_ast->children; metadata.minmax_count_projection.emplace(ProjectionDescription::getMinMaxCountProjection( columns, partition_key, minmax_columns, primary_key_asts, context)); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 0d220f2fd5da..f747bbf6b28d 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -1741,7 +1741,7 @@ class StorageFileSink final : public SinkToStorage, WithContext void initialize() { - std::unique_ptr naked_buffer = nullptr; + std::unique_ptr naked_buffer; if (use_table_fd) { naked_buffer = std::make_unique(table_fd, DBMS_DEFAULT_BUFFER_SIZE); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 588429284f01..945ee4f369f5 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -286,7 +286,7 @@ class StorageFileSource : public SourceWithKeyCondition, WithContext std::unique_ptr reader; std::shared_ptr archive_reader; - std::unique_ptr file_enumerator = nullptr; + std::unique_ptr file_enumerator; ColumnsDescription columns_description; NamesAndTypesList requested_columns; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index aad4fc36a1bd..c9f451b6bb17 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -436,7 +436,7 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( /// if we mutate part, than we should reserve space on the same disk, because mutations possible can create hardlinks if (is_mutation) { - reserved_space = storage.tryReserveSpace(total_size, future_part->parts[0]->getDataPartStorage()); + reserved_space = StorageMergeTree::tryReserveSpace(total_size, future_part->parts[0]->getDataPartStorage()); } else { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ca061db4ecf..c4b84a0ae8cc 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -10512,7 +10512,7 @@ void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & rest } auto backup = restorer.getBackup(); if (!empty && backup->hasFiles(data_path_in_backup)) - restorer.throwTableIsNotEmpty(getStorageID()); + RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); } restorePartsFromBackup(restorer, data_path_in_backup, partitions); diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 2d3aef312bf9..6cda0fca60bb 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -207,7 +207,7 @@ class StorageS3Source::DisclosedGlobIterator::Impl : WithContext , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) , file_progress_callback(file_progress_callback_) { - if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) + if (globbed_uri.bucket.find_first_of("*?{") != std::string::npos) throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); From 9626506585666a220b5d82fa276fac45e4bf3ccf Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 19:18:31 +0000 Subject: [PATCH 218/470] More fixes --- src/Coordination/FourLetterCommand.cpp | 2 +- .../UserDefinedSQLFunctionFactory.cpp | 2 +- src/Functions/decodeHTMLComponent.cpp | 2 +- src/Processors/Formats/Impl/NativeFormat.cpp | 2 +- src/Processors/Merges/Algorithms/Graphite.cpp | 2 +- src/Processors/QueryPlan/JoinStep.cpp | 2 +- src/Storages/Distributed/DistributedSink.cpp | 2 +- src/Storages/MergeTree/KeyCondition.cpp | 4 ++-- src/Storages/MergeTree/MergeFromLogEntryTask.cpp | 2 +- .../MergeTree/MergePlainMergeTreeTask.cpp | 2 +- .../MergeTreeIndexConditionBloomFilter.cpp | 2 +- src/Storages/MergeTree/MergeTreeMarksLoader.cpp | 2 +- src/Storages/MergeTree/MergeTreePartsMover.cpp | 2 +- src/Storages/MergeTree/MergeTreeSettings.cpp | 2 +- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- src/Storages/StorageFuzzJSON.cpp | 16 ++++++++-------- src/Storages/TTLDescription.cpp | 2 +- 19 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/Coordination/FourLetterCommand.cpp b/src/Coordination/FourLetterCommand.cpp index d7fa5abe7421..25254e10441e 100644 --- a/src/Coordination/FourLetterCommand.cpp +++ b/src/Coordination/FourLetterCommand.cpp @@ -592,7 +592,7 @@ String RecalculateCommand::run() String CleanResourcesCommand::run() { - keeper_dispatcher.cleanResources(); + KeeperDispatcher::cleanResources(); return "ok"; } diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp index e37e4a23b63c..e22cd6d0022b 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -86,7 +86,7 @@ namespace auto & res = typeid_cast(*ptr); res.if_not_exists = false; res.or_replace = false; - FunctionNameNormalizer().visit(res.function_core.get()); + FunctionNameNormalizer::visit(res.function_core.get()); return ptr; } } diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index 2cd95127266c..cb6ba0b07e20 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -108,7 +108,7 @@ namespace // null terminate the sequence seq.push_back('\0'); // lookup the html sequence in the perfect hashmap. - const auto * res = hash.Lookup(seq.data(), strlen(seq.data())); + const auto * res = HTMLCharacterHash::Lookup(seq.data(), strlen(seq.data())); // reset so that it's reused in the next iteration seq.clear(); if (res) diff --git a/src/Processors/Formats/Impl/NativeFormat.cpp b/src/Processors/Formats/Impl/NativeFormat.cpp index 73ffc02bbc1c..a7a49ab6a8c6 100644 --- a/src/Processors/Formats/Impl/NativeFormat.cpp +++ b/src/Processors/Formats/Impl/NativeFormat.cpp @@ -82,7 +82,7 @@ class NativeOutputFormat final : public IOutputFormat std::string getContentType() const override { - return writer.getContentType(); + return NativeWriter::getContentType(); } protected: diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 817961b709c2..a75c2b877206 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -76,7 +76,7 @@ inline static const Patterns & selectPatternsForMetricType(const Graphite::Param if (params.patterns_typed) { std::string_view path_view = path; - if (path_view.find("?"sv) == path_view.npos) + if (path_view.find("?"sv) == std::string::npos) return params.patterns_plain; else return params.patterns_tagged; diff --git a/src/Processors/QueryPlan/JoinStep.cpp b/src/Processors/QueryPlan/JoinStep.cpp index 1931b1eb3a14..8fe2515e3233 100644 --- a/src/Processors/QueryPlan/JoinStep.cpp +++ b/src/Processors/QueryPlan/JoinStep.cpp @@ -31,7 +31,7 @@ std::vector> describeJoinActions(const JoinPtr & join) description.emplace_back("ASOF inequality", toString(table_join.getAsofInequality())); if (!table_join.getClauses().empty()) - description.emplace_back("Clauses", table_join.formatClauses(table_join.getClauses(), true /*short_format*/)); + description.emplace_back("Clauses", TableJoin::formatClauses(table_join.getClauses(), true /*short_format*/)); return description; } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index ddbcc6d473f9..b89a8d7bcfd5 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -613,7 +613,7 @@ IColumn::Selector DistributedSink::createSelector(const Block & source_block) co const auto & key_column = current_block_with_sharding_key_expr.getByName(storage.getShardingKeyColumnName()); - return storage.createSelector(cluster, key_column); + return StorageDistributed::createSelector(cluster, key_column); } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index a779311c22b6..2d57ea40c9cb 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1964,8 +1964,8 @@ KeyCondition::Description KeyCondition::getDescription() const /// This means that logical NOT is applied to leaf. bool negate = false; - std::unique_ptr left; - std::unique_ptr right; + std::unique_ptr left = nullptr; + std::unique_ptr right = nullptr; }; /// The algorithm is the same as in KeyCondition::checkInHyperrectangle diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 3d1c5db07b59..e8d55f75b08a 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -426,7 +426,7 @@ bool MergeFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWrite ProfileEvents::increment(ProfileEvents::ReplicatedPartMerges); write_part_log({}); - storage.incrementMergedPartsProfileEvent(part->getType()); + StorageReplicatedMergeTree::incrementMergedPartsProfileEvent(part->getType()); return true; } diff --git a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp index c218acce903c..866a63911c3c 100644 --- a/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MergePlainMergeTreeTask.cpp @@ -149,7 +149,7 @@ void MergePlainMergeTreeTask::finish() ThreadFuzzer::maybeInjectMemoryLimitException(); write_part_log({}); - storage.incrementMergedPartsProfileEvent(new_part->getType()); + StorageMergeTree::incrementMergedPartsProfileEvent(new_part->getType()); transfer_profile_counters_to_initial_query(); if (auto txn_ = txn_holder.getTransaction()) diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index f506230b5eaf..7ab90dac5b03 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -590,7 +590,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( for (const auto & f : value_field.get()) { - if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) + if ((f.isNull() && !is_nullable) || f.isDecimal(f.getType())) /// NOLINT(readability-static-accessed-through-instance) return false; auto converted = convertFieldToType(f, *actual_type); diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 6798f97e4942..1e9a320fa953 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -210,7 +210,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksSync() if (mark_cache) { - auto key = mark_cache->hash(fs::path(data_part_storage->getFullPath()) / mrk_path); + auto key = MarkCache::hash(fs::path(data_part_storage->getFullPath()) / mrk_path); if (save_marks_in_cache) { auto callback = [this] { return loadMarksImpl(); }; diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index d32bc6d18262..1db70162bff3 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -158,7 +158,7 @@ bool MergeTreePartsMover::selectPartsForMove( { auto destination = data->getDestinationForMoveTTL(*ttl_entry); if (destination && !data->isPartInTTLDestination(*ttl_entry, *part)) - reservation = data->tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry)); + reservation = MergeTreeData::tryReserveSpace(part->getBytesOnDisk(), data->getDestinationForMoveTTL(*ttl_entry)); } if (reservation) /// Found reservation by TTL rule. diff --git a/src/Storages/MergeTree/MergeTreeSettings.cpp b/src/Storages/MergeTree/MergeTreeSettings.cpp index b42da22239eb..5d6f08d3c530 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.cpp +++ b/src/Storages/MergeTree/MergeTreeSettings.cpp @@ -230,7 +230,7 @@ void MergeTreeColumnSettings::validate(const SettingsChanges & changes) "Setting {} is unknown or not supported at column level, supported settings: {}", change.name, fmt::join(allowed_column_level_settings, ", ")); - merge_tree_settings.checkCanSet(change.name, change.value); + MergeTreeSettings::checkCanSet(change.name, change.value); } } diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 82c015f33ed3..b7dede3cb002 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -63,7 +63,7 @@ void MergeTreeSink::consume(Chunk chunk) if (!storage_snapshot->object_columns.empty()) convertDynamicColumnsToTuples(block, storage_snapshot); - auto part_blocks = storage.writer.splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); + auto part_blocks = MergeTreeDataWriter::splitBlockIntoParts(std::move(block), max_parts_per_block, metadata_snapshot, context); using DelayedPartitions = std::vector; DelayedPartitions partitions; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index e1f28c2a9515..8913e9f7e27b 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -449,7 +449,7 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithF int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; auto counters_snapshot = std::make_shared(partition.part_counters.getPartiallyAtomicSnapshot()); PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, partition.elapsed_ns, counters_snapshot), ExecutionStatus(error)); - storage.incrementInsertedPartsProfileEvent(part->getType()); + StorageReplicatedMergeTree::incrementInsertedPartsProfileEvent(part->getType()); } catch (...) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 268f2d66c96e..287a4d20543c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -43,7 +43,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr { if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { - auto minmax_idx_column_names = data.getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()); + auto minmax_idx_column_names = MergeTreeData::getMinMaxColumnsNames(metadata_snapshot->getPartitionKey()); date_column = minmax_idx_column_names[data.minmax_idx_date_column_pos]; } diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 87790dd2fdc5..918f54b16722 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -364,7 +364,7 @@ JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration if (val.fixed) val.fixed = generateRandomFixedValue(config, rnd); - else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < config.value_number_limit) + else if (val.array && val.array->size() < config.max_array_size && node_count + val.array->size() < StorageFuzzJSON::Configuration::value_number_limit) { if (val.array->empty()) val.array->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ false, depth)); @@ -377,7 +377,7 @@ JSONNode & fuzzSingleJSONNode(JSONNode & n, const StorageFuzzJSON::Configuration } ++node_count; } - else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < config.value_number_limit) + else if (val.object && val.object->size() < config.max_object_size && node_count + val.object->size() < StorageFuzzJSON::Configuration::value_number_limit) { val.object->push_back(generateRandomJSONNode(config, rnd, /*with_key*/ true, depth)); ++node_count; @@ -619,11 +619,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration { configuration.max_output_length = collection.get("max_output_length"); - if (configuration.max_output_length < 2 || configuration.max_output_length > configuration.output_length_limit) + if (configuration.max_output_length < 2 || configuration.max_output_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_output_length' argument must be within the interval [2, {}.]", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); } if (collection.has("max_nesting_level")) @@ -638,11 +638,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration if (collection.has("max_string_value_length")) { auto max_string_value_length = collection.get("max_string_value_length"); - if (max_string_value_length > configuration.output_length_limit) + if (max_string_value_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_string_value_length' argument must be at most {}.", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); configuration.max_string_value_length = std::min(max_string_value_length, configuration.max_output_length); } @@ -650,11 +650,11 @@ void StorageFuzzJSON::processNamedCollectionResult(Configuration & configuration if (collection.has("max_key_length")) { auto max_key_length = collection.get("max_key_length"); - if (max_key_length > configuration.output_length_limit) + if (max_key_length > StorageFuzzJSON::Configuration::output_length_limit) throw Exception( ErrorCodes::BAD_ARGUMENTS, "The value of the 'max_key_length' argument must be less or equal than {}.", - configuration.output_length_limit); + StorageFuzzJSON::Configuration::output_length_limit); configuration.max_key_length = std::min(max_key_length, configuration.max_output_length); configuration.min_key_length = std::min(configuration.min_key_length, configuration.max_key_length); } diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 3d1ce76dff17..6e7ea32ee59a 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -426,7 +426,7 @@ TTLTableDescription TTLTableDescription::parse(const String & str, const Columns ParserTTLExpressionList parser; ASTPtr ast = parseQuery(parser, str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS); - FunctionNameNormalizer().visit(ast.get()); + FunctionNameNormalizer::visit(ast.get()); return getTTLForTableFromAST(ast, columns, context, primary_key, context->getSettingsRef().allow_suspicious_ttl_expressions); } From 2578ceddadf0271eac6c48b059bd46b873a5ae5d Mon Sep 17 00:00:00 2001 From: Joseph Redfern Date: Wed, 3 Apr 2024 21:09:47 +0100 Subject: [PATCH 219/470] Correct typo in "use_invironment_credentials" configuration option `use_environment_credentials was incorrectly specified as `use_invironment_credentials` --- docs/en/operations/storing-data.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 9ffbb64c1ed2..2c642dd2f0b5 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -36,7 +36,7 @@ E.g. configuration option s3 https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -47,7 +47,7 @@ is equal to configuration (from `24.1`): s3 local https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -56,7 +56,7 @@ Configuration s3_plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -67,7 +67,7 @@ is equal to s3 plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -79,7 +79,7 @@ Example of full storage configuration will look like: s3 https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 @@ -105,7 +105,7 @@ Starting with 24.1 clickhouse version, it can also look like: s3 local https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 @@ -324,7 +324,7 @@ Configuration: s3_plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` @@ -337,7 +337,7 @@ Configuration: azure plain https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/ - 1 + 1 ``` From d905b83369931400bbd87faeef138fc68ac17455 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 20:10:18 +0000 Subject: [PATCH 220/470] Fix something bad --- src/Dictionaries/FlatDictionary.cpp | 3 ++- src/Functions/decodeHTMLComponent.cpp | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/FlatDictionary.cpp b/src/Dictionaries/FlatDictionary.cpp index e3b1e8a84e27..7509af31face 100644 --- a/src/Dictionaries/FlatDictionary.cpp +++ b/src/Dictionaries/FlatDictionary.cpp @@ -412,7 +412,8 @@ void FlatDictionary::blockToAttributes(const Block & block) { const auto keys_column = block.safeGetByPosition(0).column; - DictionaryKeysExtractor keys_extractor({ keys_column }, DictionaryKeysArenaHolder::getComplexKeyArena()); + DictionaryKeysArenaHolder arena_holder; + DictionaryKeysExtractor keys_extractor({ keys_column }, arena_holder.getComplexKeyArena()); /// NOLINT(readability-static-accessed-through-instance) size_t keys_size = keys_extractor.getKeysSize(); static constexpr size_t key_offset = 1; diff --git a/src/Functions/decodeHTMLComponent.cpp b/src/Functions/decodeHTMLComponent.cpp index cb6ba0b07e20..4db3c43f9461 100644 --- a/src/Functions/decodeHTMLComponent.cpp +++ b/src/Functions/decodeHTMLComponent.cpp @@ -70,8 +70,7 @@ namespace const char * src_pos = src; const char * src_end = src + src_size; char * dst_pos = dst; - // perfect hashmap to lookup html character references - HTMLCharacterHash hash; + // to hold char seq for lookup, reuse it std::vector seq; while (true) From dda1a0b9f0d09777fb7d0cc79ca26d8a58f16476 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 20:44:02 +0000 Subject: [PATCH 221/470] Also enable modernize-use-override --- .clang-tidy | 1 - 1 file changed, 1 deletion(-) diff --git a/.clang-tidy b/.clang-tidy index 13c1b116eadb..c98bee71d1ac 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -96,7 +96,6 @@ Checks: [ '-modernize-use-default-member-init', '-modernize-use-emplace', '-modernize-use-nodiscard', - '-modernize-use-override', '-modernize-use-trailing-return-type', '-performance-inefficient-string-concatenation', From 2c2f2b95263d69014154f442cabb4973ecc35ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 3 Apr 2024 22:59:59 +0200 Subject: [PATCH 222/470] Missing include --- src/IO/HTTPCommon.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 09f7724d6136..6e1c886b9b04 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include From 3c61a7f4bfc30fa23c8c9a0e90cddb12b5617cda Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 3 Apr 2024 23:34:31 +0200 Subject: [PATCH 223/470] Update storing-data.md From c6aed8b7938ee78030d5a23740a3bc191e85ca7f Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Wed, 3 Apr 2024 19:19:54 -0300 Subject: [PATCH 224/470] add optional port to regex --- src/IO/S3/URI.cpp | 2 +- src/IO/S3/URI.h | 2 +- src/IO/tests/gtest_s3_uri.cpp | 14 ++++++++++++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/IO/S3/URI.cpp b/src/IO/S3/URI.cpp index 02c77518ab69..0d498c1d743a 100644 --- a/src/IO/S3/URI.cpp +++ b/src/IO/S3/URI.cpp @@ -40,7 +40,7 @@ URI::URI(const std::string & uri_) /// Case when AWS Private Link Interface is being used /// E.g. (bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com/bucket-name/key) /// https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html - static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com)"); + static const RE2 aws_private_link_style_pattern(R"(bucket\.vpce\-([a-z0-9\-.]+)\.vpce.amazonaws.com(:\d{1,5})?)"); /// Case when bucket name and key represented in path of S3 URL. /// E.g. (https://s3.region.amazonaws.com/bucket-name/key) diff --git a/src/IO/S3/URI.h b/src/IO/S3/URI.h index 79f3da3fbbbc..06b7d03aa8ce 100644 --- a/src/IO/S3/URI.h +++ b/src/IO/S3/URI.h @@ -17,7 +17,7 @@ namespace DB::S3 * The following patterns are allowed: * s3://bucket/key * http(s)://endpoint/bucket/key - * TODO specify aws private link + * http(s)://bucket..s3..vpce.amazonaws.com<:port_number>/bucket_name/key */ struct URI { diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 0a164b0dd612..9c1f7bd5219d 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -94,6 +94,20 @@ const TestCase TestCases[] = { "b/c/d/e/f/g/h/i/j/data.json", "", false}, + // Zonal + {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com/root/nested/file.txt"), + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com", + "root", + "nested/file.txt", + "", + false}, + // Non standard port + {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com:65535/root/nested/file.txt"), + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com:65535", + "root", + "nested/file.txt", + "", + false}, }; class S3UriTest : public testing::TestWithParam From 414f3aebef07c4a8976931cf7c2db72c91e5bed9 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 7 Mar 2024 11:59:41 +0300 Subject: [PATCH 225/470] Context getGlobalTemporaryVolume use shared lock --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 65fcd51529bd..df80a60d6d33 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -971,7 +971,7 @@ Strings Context::getWarnings() const /// TODO: remove, use `getTempDataOnDisk` VolumePtr Context::getGlobalTemporaryVolume() const { - std::lock_guard lock(shared->mutex); + SharedLockGuard lock(shared->mutex); /// Calling this method we just bypass the `temp_data_on_disk` and write to the file on the volume directly. /// Volume is the same for `root_temp_data_on_disk` (always set) and `temp_data_on_disk` (if it's set). if (shared->root_temp_data_on_disk) From bad60230647a234b34f9a66f04e6ef69c474d6dc Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 10:52:11 +0200 Subject: [PATCH 226/470] Add arrayEnumeratedDenseRanked --- .../functions/array-functions.md | 59 ++++++++++++++++++- 1 file changed, 57 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 2ddce6d6f71f..7377174ace9e 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1451,21 +1451,76 @@ Result: └────────────────────────────────┘ ``` -## arrayEnumerateDense(arr) +## arrayEnumerateDense Returns an array of the same size as the source array, indicating where each element first appears in the source array. -Example: +**Syntax** + +```sql +arrayEnumerateDense(arr) +``` + +**Example** + +Query: ``` sql SELECT arrayEnumerateDense([10, 20, 10, 30]) ``` +Result: + ``` text ┌─arrayEnumerateDense([10, 20, 10, 30])─┐ │ [1,2,1,3] │ └───────────────────────────────────────┘ ``` +## arrayEnumerateDenseRanked + +Enumerates distinct values of the passed multidimensional array, looking inside at the specified depths. + +**Syntax** + +```sql +arrayEnumerateDenseRanked(clear_depth, arr, max_array_depth) +``` + +**Parameters** + +- `clear_depth`: Enumerate elements at the specified level separately. (The enumeration counter is reset for each new element). Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. +- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md) +- `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`. + +**Example** + +With `clear_depth`=1 and `max_array_depth`=1, the result is identical to what [arrayEnumerateDense](#arrayenumeratedense) would give. + +Query: + +``` sql +SELECT arrayEnumerateDenseRanked(1,[10, 20, 10, 30],1); +``` + +Result: + +``` text +[1,2,1,3] +``` + +`arrayEnumerateDenseRanked` can be used to enumerate multidimensional arrays. + +Query: + +``` sql +SELECT arrayEnumerateDenseRanked(2,[[10, 20, 10, 30],[40, 50, 60, 70]],2); +``` + +Result: + +``` text +[[1,2,1,3],[4,5,6,7]] +``` ## arrayIntersect(arr) From b91d446630a6054abcbc01251fba3abf032a62b6 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Apr 2024 10:57:29 +0200 Subject: [PATCH 227/470] Ping CI From d5229da1ec26918f8904eff0d91eabc8db89f1db Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Apr 2024 09:09:03 +0000 Subject: [PATCH 228/470] Fix tidy build --- src/Functions/GatherUtils/Sources.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/GatherUtils/Sources.h b/src/Functions/GatherUtils/Sources.h index 01b633385596..4e3009a695da 100644 --- a/src/Functions/GatherUtils/Sources.h +++ b/src/Functions/GatherUtils/Sources.h @@ -144,7 +144,7 @@ struct NumericArraySource : public ArraySourceImpl> #pragma clang diagnostic ignored "-Wsuggest-override" #pragma clang diagnostic ignored "-Wsuggest-destructor-override" -/// NOLINTBEGIN(hicpp-use-override) +/// NOLINTBEGIN(hicpp-use-override, modernize-use-override) template struct ConstSource : public Base @@ -233,7 +233,7 @@ struct ConstSource : public Base } }; -/// NOLINTEND(hicpp-use-override) +/// NOLINTEND(hicpp-use-override, modernize-use-override) #pragma clang diagnostic pop From 5ce8ab2bd986d0c34abd7d6cf3cf028cad369cbb Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Apr 2024 11:32:32 +0200 Subject: [PATCH 229/470] More logging --- src/Interpreters/Cache/FileCache.cpp | 10 ++++ src/Interpreters/Cache/FileCacheFactory.cpp | 5 +- src/Interpreters/Cache/FileCacheSettings.cpp | 58 ++++++++++++++++++++ src/Interpreters/Cache/FileCacheSettings.h | 3 + 4 files changed, 75 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 90671629e647..8ab46e66a76a 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1379,6 +1379,16 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, } } + { + auto cache_lock = lockCache(); + LOG_TRACE(log, "new max size: {}, old max size: {}, " + "new elements count: {}, old_elements_count: {}, " + "current max size: {}, current max elements: {}", + new_settings.max_size, actual_settings.max_size, + new_settings.max_elements, actual_settings.max_elements, + main_priority->getSizeLimit(cache_lock), main_priority->getElementsLimit(cache_lock)); + } + if (new_settings.max_size != actual_settings.max_size || new_settings.max_elements != actual_settings.max_elements) { diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index e1f144060580..a046c4c2b6a2 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -162,7 +162,10 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig continue; } - LOG_TRACE(log, "Will apply settings changes for cache: {}", cache_name); + LOG_TRACE(log, "Will apply settings changes for cache {}. " + "Settings changes: {} (new settings: {}, old_settings: {})", + cache_name, fmt::join(new_settings.getSettingsDiff(old_settings), ", "), + new_settings.toString(), old_settings.toString()); try { diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index 8a48a2de68fa..fef17d0df28b 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include namespace DB @@ -98,4 +99,61 @@ void FileCacheSettings::loadFromCollection(const NamedCollection & collection) loadImpl(std::move(collection_has), std::move(collection_get_uint), std::move(collection_get_string), std::move(collection_get_double)); } +std::string FileCacheSettings::toString() const +{ + WriteBufferFromOwnString res; + res << "base_path: " << base_path << "\n"; + res << "max_size: " << max_size << "\n"; + res << "max_elements: " << max_elements << "\n"; + res << "max_file_segment_size: " << max_file_segment_size << "\n"; + res << "cache_on_write_operations: " << cache_on_write_operations << "\n"; + res << "cache_hits_threshold: " << cache_hits_threshold << "\n"; + res << "enable_filesystem_query_cache_limit: " << enable_filesystem_query_cache_limit << "\n"; + res << "bypass_cache_threshold: " << bypass_cache_threshold << "\n"; + res << "boundary_alignment: " << boundary_alignment << "\n"; + res << "background_download_threads: " << background_download_threads << "\n"; + res << "background_download_queue_size_limit: " << background_download_queue_size_limit << "\n"; + res << "load_metadata_threads: " << load_metadata_threads << "\n"; + res << "write_cache_per_user_id_directory: " << write_cache_per_user_id_directory << "\n"; + res << "cache_policy: " << cache_policy << "\n"; + res << "slru_size_ratio: " << slru_size_ratio << "\n"; + return res.str(); +} + +std::vector FileCacheSettings::getSettingsDiff(const FileCacheSettings & other) const +{ + std::vector res; + if (base_path != other.base_path) + res.push_back("base_path"); + if (max_size != other.max_size) + res.push_back("max_size"); + if (max_elements != other.max_elements) + res.push_back("max_elements"); + if (max_file_segment_size != other.max_file_segment_size) + res.push_back("max_file_segment_size"); + if (cache_on_write_operations != other.cache_on_write_operations) + res.push_back("cache_on_write_operations"); + if (cache_hits_threshold != other.cache_hits_threshold) + res.push_back("cache_hits_threshold"); + if (enable_filesystem_query_cache_limit != other.enable_filesystem_query_cache_limit) + res.push_back("enable_filesystem_query_cache_limit"); + if (bypass_cache_threshold != other.bypass_cache_threshold) + res.push_back("bypass_cache_threshold"); + if (boundary_alignment != other.boundary_alignment) + res.push_back("boundary_alignment"); + if (background_download_threads != other.background_download_threads) + res.push_back("background_download_threads"); + if (background_download_queue_size_limit != other.background_download_queue_size_limit) + res.push_back("background_download_queue_size_limit"); + if (load_metadata_threads != other.load_metadata_threads) + res.push_back("load_metadata_threads"); + if (write_cache_per_user_id_directory != other.write_cache_per_user_id_directory) + res.push_back("write_cache_per_user_directory"); + if (cache_policy != other.cache_policy) + res.push_back("cache_policy"); + if (slru_size_ratio != other.slru_size_ratio) + res.push_back("slru_size_ratio"); + return res; +} + } diff --git a/src/Interpreters/Cache/FileCacheSettings.h b/src/Interpreters/Cache/FileCacheSettings.h index 14770b3f0054..7dab14ac8960 100644 --- a/src/Interpreters/Cache/FileCacheSettings.h +++ b/src/Interpreters/Cache/FileCacheSettings.h @@ -41,6 +41,9 @@ struct FileCacheSettings void loadFromConfig(const Poco::Util::AbstractConfiguration & config, const std::string & config_prefix); void loadFromCollection(const NamedCollection & collection); + std::string toString() const; + std::vector getSettingsDiff(const FileCacheSettings & other) const; + bool operator ==(const FileCacheSettings &) const = default; private: From e516bef844b397a8f9c041b914e85939b225446c Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 4 Apr 2024 09:52:58 +0000 Subject: [PATCH 230/470] Annalyzer: limit maximal size of column in constant folding --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f5474ddb662a..8f6c461d92d3 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6083,7 +6083,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi * Example: SELECT toTypeName(sum(number)) FROM numbers(10); */ if (column && isColumnConst(*column) && !typeid_cast(column.get())->getDataColumn().isDummy() && - (!hasAggregateFunctionNodes(node) && !hasFunctionNode(node, "arrayJoin"))) + !hasAggregateFunctionNodes(node) && !hasFunctionNode(node, "arrayJoin") && + /// Sanity check: do not convert large columns to constants + column->byteSize() < 1_MiB) { /// Replace function node with result constant node Field column_constant_value; From 58c79af80b38c1c3b3f64718690edafdad2156af Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2024 12:22:11 +0200 Subject: [PATCH 231/470] Simpler --- src/Common/QueryProfiler.cpp | 9 ++++++++ src/Common/ThreadStatus.cpp | 23 +++++++++++++++++++ .../Standalone/ThreadStatusExt.cpp | 4 ---- src/Interpreters/ThreadStatusExt.cpp | 21 ----------------- .../test_trace_collector_serverwide/test.py | 3 +++ 5 files changed, 35 insertions(+), 25 deletions(-) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index f985ec95e881..b616b7954050 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -264,7 +264,16 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t template void QueryProfilerBase::setPeriod(UInt32 period_) { +#if defined(SANITIZER) + UNUSED(period); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); +#elif defined(__APPLE__) + UNUSED(period); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); +#else timer.set(period_); +#endif + } template diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index cf50d305e956..a3a7257b4724 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -124,6 +124,29 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) #endif } +void ThreadStatus::initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period) +{ +#if !defined(SANITIZER) && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) && !defined(__APPLE__) + try + { + if (global_profiler_real_time_period > 0) + query_profiler_real = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_real_time_period)); + + if (global_profiler_cpu_time_period > 0) + query_profiler_cpu = std::make_unique(thread_id, + /* period= */ static_cast(global_profiler_cpu_time_period)); + } + catch (...) + { + tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); + } +#else + UNUSED(global_profiler_real_time_period); + UNUSED(global_profiler_cpu_time_period); +#endif +} + ThreadGroupPtr ThreadStatus::getThreadGroup() const { chassert(current_thread == this); diff --git a/src/Coordination/Standalone/ThreadStatusExt.cpp b/src/Coordination/Standalone/ThreadStatusExt.cpp index 2b89e2f024de..97f7287be8ca 100644 --- a/src/Coordination/Standalone/ThreadStatusExt.cpp +++ b/src/Coordination/Standalone/ThreadStatusExt.cpp @@ -11,8 +11,4 @@ void CurrentThread::attachToGroup(const ThreadGroupPtr &) { } -void ThreadStatus::initGlobalProfiler(UInt64, UInt64) -{ -} - } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 4b9bd069bc6a..2b8e8bef6d4a 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -458,27 +458,6 @@ void ThreadStatus::resetPerformanceCountersLastUsage() taskstats->reset(); } - -void ThreadStatus::initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period) -{ - - try - { - if (global_profiler_real_time_period > 0) - query_profiler_real = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_real_time_period)); - - if (global_profiler_cpu_time_period > 0) - query_profiler_cpu = std::make_unique(thread_id, - /* period= */ static_cast(global_profiler_cpu_time_period)); - } - catch (...) - { - tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); - } - -} - void ThreadStatus::initQueryProfiler() { if (internal_thread) diff --git a/tests/integration/test_trace_collector_serverwide/test.py b/tests/integration/test_trace_collector_serverwide/test.py index 88d235642b9b..9bd107ac3659 100644 --- a/tests/integration/test_trace_collector_serverwide/test.py +++ b/tests/integration/test_trace_collector_serverwide/test.py @@ -22,6 +22,9 @@ def start_cluster(): def test_global_thread_profiler(start_cluster): + if node1.is_built_with_sanitizer(): + return + node1.query( "CREATE TABLE t (key UInt32, value String) Engine = MergeTree() ORDER BY key" ) From 276246ee9740a7c737434c8bdb0c8b33e19ee321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 4 Apr 2024 12:29:54 +0200 Subject: [PATCH 232/470] Introduce IAggregateFunction_fwd to reduce header dependencies --- src/AggregateFunctions/IAggregateFunction.h | 10 +- .../IAggregateFunction_fwd.h | 14 +++ src/Analyzer/ArrayJoinNode.cpp | 12 +-- src/Analyzer/ColumnNode.cpp | 10 +- src/Analyzer/JoinNode.cpp | 12 +-- src/Analyzer/WindowNode.cpp | 8 +- src/Columns/ColumnAggregateFunction.h | 8 +- src/DataTypes/DataTypeAggregateFunction.cpp | 25 +++++ src/DataTypes/DataTypeAggregateFunction.h | 17 ++-- .../DataTypeCustomSimpleAggregateFunction.cpp | 1 + .../DataTypeCustomSimpleAggregateFunction.h | 7 +- .../SerializationAggregateFunction.cpp | 1 + .../SerializationAggregateFunction.h | 2 +- src/Formats/ProtobufSerializer.cpp | 93 ++++++++++--------- src/Functions/FunctionBinaryArithmetic.h | 31 ++++--- src/Functions/FunctionsConversion.cpp | 1 + src/Functions/runningAccumulate.cpp | 7 +- src/Interpreters/AggregateDescription.cpp | 4 +- src/Interpreters/AggregateDescription.h | 5 +- src/Interpreters/AggregatedData.h | 2 +- src/Interpreters/AggregatedDataVariants.h | 7 +- src/Interpreters/AggregationUtils.cpp | 1 + src/Interpreters/HashJoin.cpp | 1 + src/Interpreters/JIT/compileFunction.cpp | 21 +++-- src/Interpreters/JIT/compileFunction.h | 8 +- .../RewriteFunctionToSubcolumnVisitor.cpp | 7 +- src/Interpreters/WindowDescription.cpp | 8 +- src/Interpreters/WindowDescription.h | 8 +- src/Parsers/ExpressionElementParsers.cpp | 5 +- .../Algorithms/AggregatingSortedAlgorithm.h | 3 +- src/Processors/Merges/Algorithms/Graphite.cpp | 24 ++++- src/Processors/Merges/Algorithms/Graphite.h | 22 +---- src/Processors/QueryPlan/WindowStep.cpp | 8 +- src/Processors/Transforms/WindowTransform.h | 2 + src/Storages/System/StorageSystemGraphite.cpp | 5 +- .../System/StorageSystemMySQLBinlogs.cpp | 10 +- .../System/StorageSystemRemoteDataPaths.cpp | 11 ++- 37 files changed, 234 insertions(+), 187 deletions(-) create mode 100644 src/AggregateFunctions/IAggregateFunction_fwd.h diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 97e0e89aee98..ee227db6d9d5 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -1,17 +1,18 @@ #pragma once +#include #include #include #include #include #include #include +#include #include #include #include #include #include -#include #include "config.h" @@ -46,13 +47,6 @@ class IWindowFunction; using DataTypePtr = std::shared_ptr; using DataTypes = std::vector; -using AggregateDataPtr = char *; -using AggregateDataPtrs = std::vector; -using ConstAggregateDataPtr = const char *; - -class IAggregateFunction; -using AggregateFunctionPtr = std::shared_ptr; - struct AggregateFunctionProperties; /** Aggregate functions interface. diff --git a/src/AggregateFunctions/IAggregateFunction_fwd.h b/src/AggregateFunctions/IAggregateFunction_fwd.h new file mode 100644 index 000000000000..7c78e32c6528 --- /dev/null +++ b/src/AggregateFunctions/IAggregateFunction_fwd.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include + +namespace DB +{ +using AggregateDataPtr = char *; +using AggregateDataPtrs = std::vector; +using ConstAggregateDataPtr = const char *; + +class IAggregateFunction; +using AggregateFunctionPtr = std::shared_ptr; +} diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index ee6bd80150d1..5ae097b1c124 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -1,14 +1,12 @@ #include - +#include +#include +#include #include #include -#include - -#include #include - -#include -#include +#include +#include namespace DB { diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp index 3d9f5d1640ef..fa8c59f50f3c 100644 --- a/src/Analyzer/ColumnNode.cpp +++ b/src/Analyzer/ColumnNode.cpp @@ -1,14 +1,12 @@ #include - -#include - +#include +#include #include #include -#include - #include +#include +#include -#include namespace DB { diff --git a/src/Analyzer/JoinNode.cpp b/src/Analyzer/JoinNode.cpp index 9b61c8b19d0d..53a003ec3c07 100644 --- a/src/Analyzer/JoinNode.cpp +++ b/src/Analyzer/JoinNode.cpp @@ -1,16 +1,14 @@ #include #include - +#include +#include #include #include -#include - -#include -#include #include +#include +#include #include - -#include +#include namespace DB { diff --git a/src/Analyzer/WindowNode.cpp b/src/Analyzer/WindowNode.cpp index 0fbe7c51bc7c..af830815fd55 100644 --- a/src/Analyzer/WindowNode.cpp +++ b/src/Analyzer/WindowNode.cpp @@ -1,11 +1,9 @@ #include - -#include - -#include #include - +#include #include +#include +#include namespace DB { diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index c5d854e208a0..a75b27e835c2 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -17,13 +18,6 @@ using ArenaPtr = std::shared_ptr; using ConstArenaPtr = std::shared_ptr; using ConstArenas = std::vector; -using AggregateDataPtr = char *; -using AggregateDataPtrs = std::vector; -using ConstAggregateDataPtr = const char *; - -class IAggregateFunction; -using AggregateFunctionPtr = std::shared_ptr; - class Context; using ContextPtr = std::shared_ptr; diff --git a/src/DataTypes/DataTypeAggregateFunction.cpp b/src/DataTypes/DataTypeAggregateFunction.cpp index 14a3c6a4248e..ef7d86d2a812 100644 --- a/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/src/DataTypes/DataTypeAggregateFunction.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -32,6 +33,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +String DataTypeAggregateFunction::getFunctionName() const +{ + return function->getName(); +} + String DataTypeAggregateFunction::doGetName() const { @@ -52,6 +58,25 @@ size_t DataTypeAggregateFunction::getVersion() const return function->getDefaultVersion(); } +DataTypePtr DataTypeAggregateFunction::getReturnType() const +{ + return function->getResultType(); +} + +DataTypePtr DataTypeAggregateFunction::getReturnTypeToPredict() const +{ + return function->getReturnTypeToPredict(); +} + +bool DataTypeAggregateFunction::isVersioned() const +{ + return function->isVersioned(); +} + +void DataTypeAggregateFunction::updateVersionFromRevision(size_t revision, bool if_empty) const +{ + setVersion(function->getVersionFromRevision(revision), if_empty); +} String DataTypeAggregateFunction::getNameImpl(bool with_version) const { diff --git a/src/DataTypes/DataTypeAggregateFunction.h b/src/DataTypes/DataTypeAggregateFunction.h index 7d1bb355ccf9..8b4b3d6ee4cf 100644 --- a/src/DataTypes/DataTypeAggregateFunction.h +++ b/src/DataTypes/DataTypeAggregateFunction.h @@ -1,7 +1,7 @@ #pragma once -#include - +#include +#include #include @@ -39,7 +39,7 @@ class DataTypeAggregateFunction final : public IDataType { } - String getFunctionName() const { return function->getName(); } + String getFunctionName() const; AggregateFunctionPtr getFunction() const { return function; } String doGetName() const override; @@ -51,8 +51,8 @@ class DataTypeAggregateFunction final : public IDataType bool canBeInsideNullable() const override { return false; } - DataTypePtr getReturnType() const { return function->getResultType(); } - DataTypePtr getReturnTypeToPredict() const { return function->getReturnTypeToPredict(); } + DataTypePtr getReturnType() const; + DataTypePtr getReturnTypeToPredict() const; DataTypes getArgumentsDataTypes() const { return argument_types; } MutableColumnPtr createColumn() const override; @@ -69,7 +69,7 @@ class DataTypeAggregateFunction final : public IDataType SerializationPtr doGetDefaultSerialization() const override; bool supportsSparseSerialization() const override { return false; } - bool isVersioned() const { return function->isVersioned(); } + bool isVersioned() const; /// Version is not empty only if it was parsed from AST or implicitly cast to 0 or version according /// to server revision. @@ -84,10 +84,7 @@ class DataTypeAggregateFunction final : public IDataType version = version_; } - void updateVersionFromRevision(size_t revision, bool if_empty) const - { - setVersion(function->getVersionFromRevision(revision), if_empty); - } + void updateVersionFromRevision(size_t revision, bool if_empty) const; }; void setVersionToAggregateFunctions(DataTypePtr & type, bool if_empty, std::optional revision = std::nullopt); diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp index ee9870eb0efd..cae9622bcb93 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.cpp @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h index 926dfd9cc828..bdabb465fe56 100644 --- a/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h +++ b/src/DataTypes/DataTypeCustomSimpleAggregateFunction.h @@ -1,13 +1,18 @@ #pragma once +#include +#include #include -#include #include namespace DB { +class IDataType; +using DataTypePtr = std::shared_ptr; +using DataTypes = std::vector; + /** The type SimpleAggregateFunction(fct, type) is meant to be used in an AggregatingMergeTree. It behaves like a standard * data type but when rows are merged, an aggregation function is applied. * diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp index 2ac23d52e28f..640d2c419d49 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.cpp +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/DataTypes/Serializations/SerializationAggregateFunction.h b/src/DataTypes/Serializations/SerializationAggregateFunction.h index 4212298bbc14..c45fc79f7143 100644 --- a/src/DataTypes/Serializations/SerializationAggregateFunction.h +++ b/src/DataTypes/Serializations/SerializationAggregateFunction.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include diff --git a/src/Formats/ProtobufSerializer.cpp b/src/Formats/ProtobufSerializer.cpp index c0d0713e2542..f2f1d985cc9c 100644 --- a/src/Formats/ProtobufSerializer.cpp +++ b/src/Formats/ProtobufSerializer.cpp @@ -1,51 +1,54 @@ #include #if USE_PROTOBUF -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include -# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include +# include +# include +# include +# include +# include +# include +# include + namespace DB { diff --git a/src/Functions/FunctionBinaryArithmetic.h b/src/Functions/FunctionBinaryArithmetic.h index 79e5ee442c20..89ff63995b18 100644 --- a/src/Functions/FunctionBinaryArithmetic.h +++ b/src/Functions/FunctionBinaryArithmetic.h @@ -5,54 +5,55 @@ // sanitizer/asan_interface.h #include #include -#include +#include #include +#include #include #include #include #include #include #include +#include +#include +#include #include #include +#include #include #include #include #include #include +#include #include -#include +#include #include -#include +#include #include #include +#include #include #include +#include #include #include #include #include #include #include +#include #include #include +#include #include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #if USE_EMBEDDED_COMPILER # include diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index 0f624a2fa2e5..9f994055afc0 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index b0ba10c40492..793e79cdf461 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -1,8 +1,9 @@ -#include -#include -#include +#include #include #include +#include +#include +#include #include #include #include diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp index 787e0a503f85..d4c09995b56e 100644 --- a/src/Interpreters/AggregateDescription.cpp +++ b/src/Interpreters/AggregateDescription.cpp @@ -1,7 +1,7 @@ +#include +#include #include #include -#include - #include diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h index 8c3302a8b0b3..0f1c0ce67ae6 100644 --- a/src/Interpreters/AggregateDescription.h +++ b/src/Interpreters/AggregateDescription.h @@ -1,13 +1,16 @@ #pragma once -#include +#include #include +#include #include #include namespace DB { +class WriteBuffer; + namespace JSONBuilder { class JSONMap; } struct AggregateDescription diff --git a/src/Interpreters/AggregatedData.h b/src/Interpreters/AggregatedData.h index 6cd6b190801e..4b581c682cab 100644 --- a/src/Interpreters/AggregatedData.h +++ b/src/Interpreters/AggregatedData.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include diff --git a/src/Interpreters/AggregatedDataVariants.h b/src/Interpreters/AggregatedDataVariants.h index 8b82c5d9842b..9f7185db9fca 100644 --- a/src/Interpreters/AggregatedDataVariants.h +++ b/src/Interpreters/AggregatedDataVariants.h @@ -1,11 +1,12 @@ #pragma once -#include -#include -#include #include #include #include +#include +#include + + namespace DB { class Arena; diff --git a/src/Interpreters/AggregationUtils.cpp b/src/Interpreters/AggregationUtils.cpp index 125a9e4f6b83..132ce93786aa 100644 --- a/src/Interpreters/AggregationUtils.cpp +++ b/src/Interpreters/AggregationUtils.cpp @@ -1,3 +1,4 @@ +#include #include namespace DB diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 12a906526f6c..73498b39ead3 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/JIT/compileFunction.cpp b/src/Interpreters/JIT/compileFunction.cpp index f50a122f9a28..31d3920ccfdd 100644 --- a/src/Interpreters/JIT/compileFunction.cpp +++ b/src/Interpreters/JIT/compileFunction.cpp @@ -2,16 +2,17 @@ #if USE_EMBEDDED_COMPILER -#include -#include -#include - -#include -#include -#include -#include -#include -#include +# include +# include +# include +# include +# include +# include +# include + +# include +# include +# include namespace { diff --git a/src/Interpreters/JIT/compileFunction.h b/src/Interpreters/JIT/compileFunction.h index 84abfa0925a1..551e4d0bb114 100644 --- a/src/Interpreters/JIT/compileFunction.h +++ b/src/Interpreters/JIT/compileFunction.h @@ -4,10 +4,10 @@ #if USE_EMBEDDED_COMPILER -#include -#include -#include -#include +# include +# include +# include +# include namespace DB diff --git a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp index 0717abd47821..f02021997529 100644 --- a/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp +++ b/src/Interpreters/RewriteFunctionToSubcolumnVisitor.cpp @@ -1,9 +1,10 @@ -#include -#include #include +#include +#include +#include #include #include -#include +#include namespace DB { diff --git a/src/Interpreters/WindowDescription.cpp b/src/Interpreters/WindowDescription.cpp index 8a7a5024d692..31a881001e3b 100644 --- a/src/Interpreters/WindowDescription.cpp +++ b/src/Interpreters/WindowDescription.cpp @@ -1,10 +1,10 @@ -#include - +#include #include -#include -#include #include +#include #include +#include +#include namespace DB diff --git a/src/Interpreters/WindowDescription.h b/src/Interpreters/WindowDescription.h index d14908fe9937..05269c9d2c38 100644 --- a/src/Interpreters/WindowDescription.h +++ b/src/Interpreters/WindowDescription.h @@ -1,12 +1,12 @@ #pragma once +#include #include -#include -#include -#include -#include #include +#include #include +#include +#include namespace DB { diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 67f4a306292d..d5f8baf547e0 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -5,9 +5,10 @@ #include #include -#include -#include #include +#include +#include +#include #include "Parsers/CommonParsers.h" #include diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h index aa2215731516..db8ee66ab2b5 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.h @@ -1,9 +1,10 @@ #pragma once +#include #include -#include #include #include +#include namespace DB { diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 817961b709c2..e3e94b8dd27d 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -1,16 +1,17 @@ -#include -#include #include +#include #include #include +#include #include +#include +#include #include #include #include #include -#include #include @@ -61,6 +62,23 @@ RuleType ruleType(const String & s) throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "invalid rule type: {}", s); } +void Pattern::updateHash(SipHash & hash) const +{ + hash.update(rule_type); + hash.update(regexp_str); + if (function) + { + hash.update(function->getName()); + for (const auto & p : function->getParameters()) + hash.update(toString(p)); + } + for (const auto & r : retentions) + { + hash.update(r.age); + hash.update(r.precision); + } +} + static const Graphite::Pattern undef_pattern = { /// empty pattern for selectPatternForPath .rule_type = RuleTypeAll, diff --git a/src/Processors/Merges/Algorithms/Graphite.h b/src/Processors/Merges/Algorithms/Graphite.h index 04bb4548c146..ce3331053d14 100644 --- a/src/Processors/Merges/Algorithms/Graphite.h +++ b/src/Processors/Merges/Algorithms/Graphite.h @@ -1,8 +1,9 @@ #pragma once -#include +#include +#include #include -#include +#include /** Intended for implementation of "rollup" - aggregation (rounding) of older data * for a table with Graphite data (Graphite is the system for time series monitoring). @@ -123,22 +124,7 @@ struct Pattern AggregateFunctionPtr function; Retentions retentions; /// Must be ordered by 'age' descending. enum { TypeUndef, TypeRetention, TypeAggregation, TypeAll } type = TypeAll; /// The type of defined pattern, filled automatically - void updateHash(SipHash & hash) const - { - hash.update(rule_type); - hash.update(regexp_str); - if (function) - { - hash.update(function->getName()); - for (const auto & p : function->getParameters()) - hash.update(toString(p)); - } - for (const auto & r : retentions) - { - hash.update(r.age); - hash.update(r.precision); - } - } + void updateHash(SipHash & hash) const; }; bool operator==(const Pattern & a, const Pattern & b); diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp index bb4f429d6268..3d1faf7c0797 100644 --- a/src/Processors/QueryPlan/WindowStep.cpp +++ b/src/Processors/QueryPlan/WindowStep.cpp @@ -1,10 +1,10 @@ +#include +#include +#include #include - -#include #include +#include #include -#include -#include #include namespace DB diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h index 347c2516230f..43fa6b28019e 100644 --- a/src/Processors/Transforms/WindowTransform.h +++ b/src/Processors/Transforms/WindowTransform.h @@ -21,6 +21,8 @@ using ExpressionActionsPtr = std::shared_ptr; class Arena; +class IWindowFunction; + // Runtime data for computing one window function. struct WindowFunctionWorkspace { diff --git a/src/Storages/System/StorageSystemGraphite.cpp b/src/Storages/System/StorageSystemGraphite.cpp index d8b760e1302d..ef13c3c24da0 100644 --- a/src/Storages/System/StorageSystemGraphite.cpp +++ b/src/Storages/System/StorageSystemGraphite.cpp @@ -1,7 +1,8 @@ -#include -#include +#include #include #include +#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemMySQLBinlogs.cpp b/src/Storages/System/StorageSystemMySQLBinlogs.cpp index 32648d22ee8c..846fe3547d00 100644 --- a/src/Storages/System/StorageSystemMySQLBinlogs.cpp +++ b/src/Storages/System/StorageSystemMySQLBinlogs.cpp @@ -1,11 +1,11 @@ -#include - -#include -#include #include -#include +#include #include +#include +#include #include +#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemRemoteDataPaths.cpp b/src/Storages/System/StorageSystemRemoteDataPaths.cpp index 0ca76430ceb4..f54fa220e830 100644 --- a/src/Storages/System/StorageSystemRemoteDataPaths.cpp +++ b/src/Storages/System/StorageSystemRemoteDataPaths.cpp @@ -1,14 +1,15 @@ #include "StorageSystemRemoteDataPaths.h" -#include +#include +#include +#include #include +#include #include +#include #include #include -#include -#include -#include #include -#include +#include namespace fs = std::filesystem; From 33aee0f599867da294cfc5327cc4ab932e761066 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 4 Apr 2024 13:00:51 +0200 Subject: [PATCH 233/470] Analyzer: Fix name resolution from parent scopes --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 +++++- ...alyzer_resolve_from_parent_scope.reference | 1 + ...033_analyzer_resolve_from_parent_scope.sql | 27 +++++++++++++++++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference create mode 100644 tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index f5474ddb662a..91832f6060d6 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -3993,9 +3993,15 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const } else if (resolved_identifier->as()) { - lookup_result.resolved_identifier = resolved_identifier; return lookup_result; } + else if (auto * resolved_function = resolved_identifier->as()) + { + /// Special case: scalar subquery was executed and replaced by __getScalar function. + /// Handle it as a constant. + if (resolved_function->getFunctionName() == "__getScalar") + return lookup_result; + } throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Resolve identifier '{}' from parent scope only supported for constants and CTE. Actual {} node type {}. In scope {}", diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference new file mode 100644 index 000000000000..f599e28b8ab0 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.reference @@ -0,0 +1 @@ +10 diff --git a/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql new file mode 100644 index 000000000000..22f103c9bd5d --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_resolve_from_parent_scope.sql @@ -0,0 +1,27 @@ +CREATE TABLE vecs_Float32 (v Array(Float32)) ENGINE=Memory; +INSERT INTO vecs_Float32 +SELECT v FROM ( + SELECT + number AS n, + [ + rand(n*10), rand(n*10+1), rand(n*10+2), rand(n*10+3), rand(n*10+4), rand(n*10+5), rand(n*10+6), rand(n*10+7), rand(n*10+8), rand(n*10+9), + rand(n*10+10), rand(n*10+11), rand(n*10+12), rand(n*10+13), rand(n*10+14), rand(n*10+15), rand(n*10+16), rand(n*10+17), rand(n*10+18), rand(n*10+19), + rand(n*10+20), rand(n*10+21), rand(n*10+22), rand(n*10+23), rand(n*10+24), rand(n*10+25), rand(n*10+26), rand(n*10+27), rand(n*10+28), rand(n*10+29), + rand(n*10+30), rand(n*10+31), rand(n*10+32), rand(n*10+33), rand(n*10+34), rand(n*10+35), rand(n*10+36), rand(n*10+37), rand(n*10+38), rand(n*10+39), + rand(n*10+40), rand(n*10+41), rand(n*10+42), rand(n*10+43), rand(n*10+44), rand(n*10+45), rand(n*10+46), rand(n*10+47), rand(n*10+48), rand(n*10+49), + rand(n*10+50), rand(n*10+51), rand(n*10+52), rand(n*10+53), rand(n*10+54), rand(n*10+55), rand(n*10+56), rand(n*10+57), rand(n*10+58), rand(n*10+59), + rand(n*10+60), rand(n*10+61), rand(n*10+62), rand(n*10+63), rand(n*10+64), rand(n*10+65), rand(n*10+66), rand(n*10+67), rand(n*10+68), rand(n*10+69), + rand(n*10+70), rand(n*10+71), rand(n*10+72), rand(n*10+73), rand(n*10+74), rand(n*10+75), rand(n*10+76), rand(n*10+77), rand(n*10+78), rand(n*10+79), + rand(n*10+80), rand(n*10+81), rand(n*10+82), rand(n*10+83), rand(n*10+84), rand(n*10+85), rand(n*10+86), rand(n*10+87), rand(n*10+88), rand(n*10+89), + rand(n*10+90), rand(n*10+91), rand(n*10+92), rand(n*10+93), rand(n*10+94), rand(n*10+95), rand(n*10+96), rand(n*10+97), rand(n*10+98), rand(n*10+99), + rand(n*10+100), rand(n*10+101), rand(n*10+102), rand(n*10+103), rand(n*10+104), rand(n*10+105), rand(n*10+106), rand(n*10+107), rand(n*10+108), rand(n*10+109), + rand(n*10+110), rand(n*10+111), rand(n*10+112), rand(n*10+113), rand(n*10+114), rand(n*10+115), rand(n*10+116), rand(n*10+117), rand(n*10+118), rand(n*10+119), + rand(n*10+120), rand(n*10+121), rand(n*10+122), rand(n*10+123), rand(n*10+124), rand(n*10+125), rand(n*10+126), rand(n*10+127), rand(n*10+128), rand(n*10+129), + rand(n*10+130), rand(n*10+131), rand(n*10+132), rand(n*10+133), rand(n*10+134), rand(n*10+135), rand(n*10+136), rand(n*10+137), rand(n*10+138), rand(n*10+139), + rand(n*10+140), rand(n*10+141), rand(n*10+142), rand(n*10+143), rand(n*10+144), rand(n*10+145), rand(n*10+146), rand(n*10+147), rand(n*10+148), rand(n*10+149) + ] AS v + FROM system.numbers + LIMIT 10 +); + +WITH (SELECT v FROM vecs_Float32 limit 1) AS a SELECT count(dp) FROM (SELECT dotProduct(a, v) AS dp FROM vecs_Float32); From 5d677936098c380c6f86f280c214b576f998175b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A1nos=20Benjamin=20Antal?= Date: Thu, 4 Apr 2024 11:41:28 +0000 Subject: [PATCH 234/470] Fix docs about default value of `output_format_pretty_row_numbers` --- docs/en/interfaces/formats.md | 4 ++-- docs/en/operations/settings/settings-formats.md | 2 +- docs/ru/operations/settings/settings.md | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b6235fd11825..ddbbb365e8ad 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1487,7 +1487,7 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 - [output_format_pretty_max_value_width](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. - [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. - [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. -- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. +- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `true`. ## RowBinary {#rowbinary} @@ -2465,7 +2465,7 @@ Result: ## Npy {#data-format-npy} -This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse: +This function is designed to load a NumPy array from a .npy file into ClickHouse. The NumPy file format is a binary format used for efficiently storing arrays of numerical data. During import, ClickHouse treats top level dimension as an array of rows with single column. Supported Npy data types and their corresponding type in ClickHouse: | Npy type | ClickHouse type | |:--------:|:---------------:| | b1 | UInt8 | diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 831c70941147..f455fcba840a 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -1642,7 +1642,7 @@ Possible values: - 0 — Output without row numbers. - 1 — Output with row numbers. -Default value: `0`. +Default value: `1`. **Example** diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index a56afda641b4..f9456e34a56f 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -2776,7 +2776,7 @@ SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes; - 0 — номера строк не выводятся. - 1 — номера строк выводятся. -Значение по умолчанию: `0`. +Значение по умолчанию: `1`. **Пример** @@ -2798,7 +2798,7 @@ SELECT TOP 3 name, value FROM system.settings; ``` ### output_format_pretty_color {#output_format_pretty_color} -Включает/выключает управляющие последовательности ANSI в форматах Pretty. +Включает/выключает управляющие последовательности ANSI в форматах Pretty. Возможные значения: @@ -4123,7 +4123,7 @@ SELECT sum(number) FROM numbers(10000000000) SETTINGS partial_result_on_first_ca ## session_timezone {#session_timezone} Задаёт значение часового пояса (session_timezone) по умолчанию для текущей сессии вместо [часового пояса сервера](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). То есть, все значения DateTime/DateTime64, для которых явно не задан часовой пояс, будут интерпретированы как относящиеся к указанной зоне. -При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. +При значении настройки `''` (пустая строка), будет совпадать с часовым поясом сервера. Функции `timeZone()` and `serverTimezone()` возвращают часовой пояс текущей сессии и сервера соответственно. From 62f9be052c3a1046fb492a313ad77df80e532009 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Apr 2024 13:41:30 +0200 Subject: [PATCH 235/470] Fix test --- src/Interpreters/Cache/FileCache.cpp | 10 ------- src/Interpreters/Cache/FileCacheFactory.cpp | 8 +++--- src/Interpreters/Cache/FileCacheSettings.cpp | 30 ++++++++++---------- tests/config/config.d/storage_conf.xml | 2 +- 4 files changed, 20 insertions(+), 30 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 8ab46e66a76a..90671629e647 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1379,16 +1379,6 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, } } - { - auto cache_lock = lockCache(); - LOG_TRACE(log, "new max size: {}, old max size: {}, " - "new elements count: {}, old_elements_count: {}, " - "current max size: {}, current max elements: {}", - new_settings.max_size, actual_settings.max_size, - new_settings.max_elements, actual_settings.max_elements, - main_priority->getSizeLimit(cache_lock), main_priority->getElementsLimit(cache_lock)); - } - if (new_settings.max_size != actual_settings.max_size || new_settings.max_elements != actual_settings.max_elements) { diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index a046c4c2b6a2..747b31bff644 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -162,10 +162,10 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig continue; } - LOG_TRACE(log, "Will apply settings changes for cache {}. " - "Settings changes: {} (new settings: {}, old_settings: {})", - cache_name, fmt::join(new_settings.getSettingsDiff(old_settings), ", "), - new_settings.toString(), old_settings.toString()); + // LOG_TRACE(log, "Will apply settings changes for cache {}. " + // "Settings changes: {} (new settings: {}, old_settings: {})", + // cache_name, fmt::join(new_settings.getSettingsDiff(old_settings), ", "), + // new_settings.toString(), old_settings.toString()); try { diff --git a/src/Interpreters/Cache/FileCacheSettings.cpp b/src/Interpreters/Cache/FileCacheSettings.cpp index fef17d0df28b..ff5f48503b7e 100644 --- a/src/Interpreters/Cache/FileCacheSettings.cpp +++ b/src/Interpreters/Cache/FileCacheSettings.cpp @@ -102,21 +102,21 @@ void FileCacheSettings::loadFromCollection(const NamedCollection & collection) std::string FileCacheSettings::toString() const { WriteBufferFromOwnString res; - res << "base_path: " << base_path << "\n"; - res << "max_size: " << max_size << "\n"; - res << "max_elements: " << max_elements << "\n"; - res << "max_file_segment_size: " << max_file_segment_size << "\n"; - res << "cache_on_write_operations: " << cache_on_write_operations << "\n"; - res << "cache_hits_threshold: " << cache_hits_threshold << "\n"; - res << "enable_filesystem_query_cache_limit: " << enable_filesystem_query_cache_limit << "\n"; - res << "bypass_cache_threshold: " << bypass_cache_threshold << "\n"; - res << "boundary_alignment: " << boundary_alignment << "\n"; - res << "background_download_threads: " << background_download_threads << "\n"; - res << "background_download_queue_size_limit: " << background_download_queue_size_limit << "\n"; - res << "load_metadata_threads: " << load_metadata_threads << "\n"; - res << "write_cache_per_user_id_directory: " << write_cache_per_user_id_directory << "\n"; - res << "cache_policy: " << cache_policy << "\n"; - res << "slru_size_ratio: " << slru_size_ratio << "\n"; + res << "base_path: " << base_path << ", "; + res << "max_size: " << max_size << ", "; + res << "max_elements: " << max_elements << ", "; + res << "max_file_segment_size: " << max_file_segment_size << ", "; + res << "cache_on_write_operations: " << cache_on_write_operations << ", "; + res << "cache_hits_threshold: " << cache_hits_threshold << ", "; + res << "enable_filesystem_query_cache_limit: " << enable_filesystem_query_cache_limit << ", "; + res << "bypass_cache_threshold: " << bypass_cache_threshold << ", "; + res << "boundary_alignment: " << boundary_alignment << ", "; + res << "background_download_threads: " << background_download_threads << ", "; + res << "background_download_queue_size_limit: " << background_download_queue_size_limit << ", "; + res << "load_metadata_threads: " << load_metadata_threads << ", "; + res << "write_cache_per_user_id_directory: " << write_cache_per_user_id_directory << ", "; + res << "cache_policy: " << cache_policy << ", "; + res << "slru_size_ratio: " << slru_size_ratio << ", "; return res.str(); } diff --git a/tests/config/config.d/storage_conf.xml b/tests/config/config.d/storage_conf.xml index 00d8cb3aea56..d40854247cd0 100644 --- a/tests/config/config.d/storage_conf.xml +++ b/tests/config/config.d/storage_conf.xml @@ -19,7 +19,7 @@ cache s3_disk s3_cache/ - 64Mi + 104857600 1 100 LRU From 7e608f567b4794c7b28007c1d454ec5db5ec9657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 4 Apr 2024 13:55:52 +0200 Subject: [PATCH 236/470] Fix argMax with nullable non native numeric column --- src/AggregateFunctions/SingleValueData.cpp | 2 +- ...rgMinMax_numeric_non_extreme_bug.reference | 12 +++++++++ ...3035_argMinMax_numeric_non_extreme_bug.sql | 26 +++++++++++++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.reference create mode 100644 tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.sql diff --git a/src/AggregateFunctions/SingleValueData.cpp b/src/AggregateFunctions/SingleValueData.cpp index 72eaf36e254e..a14caf00f730 100644 --- a/src/AggregateFunctions/SingleValueData.cpp +++ b/src/AggregateFunctions/SingleValueData.cpp @@ -579,7 +579,7 @@ std::optional SingleValueDataFixed::getGreatestIndexNotNullIf( return std::nullopt; for (size_t i = index + 1; i < row_end; i++) - if ((!if_map || if_map[i] != 0) && (!null_map || null_map[i] == 0) && (vec[i] < vec[index])) + if ((!if_map || if_map[i] != 0) && (!null_map || null_map[i] == 0) && (vec[i] > vec[index])) index = i; return {index}; } diff --git a/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.reference b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.reference new file mode 100644 index 000000000000..be07c950fea2 --- /dev/null +++ b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.reference @@ -0,0 +1,12 @@ +Row 1: +────── +max(time): 2021-01-01 00:00:59.000 +max(toNullable(time)): 2021-01-01 00:00:59.000 +min(time): 2021-01-01 00:00:00.000 +min(toNullable(time)): 2021-01-01 00:00:00.000 +argMax(value, time): -1 +argMax(value, toNullable(time)): -1 +argMin(value, time): 0 +argMin(value, toNullable(time)): 0 +argMinIf(value, toNullable(time), notEquals(time, '2021-01-01 00:00:00.000')): 1 +argMaxIf(value, toNullable(time), notEquals(time, '2021-01-01 00:00:59.000')): -2 diff --git a/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.sql b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.sql new file mode 100644 index 000000000000..deb580b90404 --- /dev/null +++ b/tests/queries/0_stateless/03035_argMinMax_numeric_non_extreme_bug.sql @@ -0,0 +1,26 @@ +CREATE TABLE IF NOT EXISTS test +( + `value` Float64 CODEC(Delta, LZ4), + `uuid` LowCardinality(String), + `time` DateTime64(3, 'UTC') CODEC(DoubleDelta, LZ4) +) +ENGINE = MergeTree() +ORDER BY uuid; + + +INSERT INTO test (uuid, time, value) +VALUES ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:00.000',0), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:09.000',1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:10.000',2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:19.000',3), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:20.000',2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:29.000',1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:30.000',0), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:39.000',-1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:40.000',-2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:49.000',-3), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:50.000',-2), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:00:59.000',-1), ('a1000000-0000-0000-0000-0000000000a1','2021-01-01 00:01:00.000',0); + +SELECT + max(time), + max(toNullable(time)), + min(time), + min(toNullable(time)), + argMax(value, time), + argMax(value, toNullable(time)), + argMin(value, time), + argMin(value, toNullable(time)), + argMinIf(value, toNullable(time), time != '2021-01-01 00:00:00.000'), + argMaxIf(value, toNullable(time), time != '2021-01-01 00:00:59.000'), +FROM test +WHERE (time >= fromUnixTimestamp64Milli(1609459200000, 'UTC')) AND (time < fromUnixTimestamp64Milli(1609459260000, 'UTC')) FORMAT Vertical; From 193aaea2b3acb9802d14c7047ded6924df57aa26 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 14:13:22 +0200 Subject: [PATCH 237/470] Add arrayEnumerateUniqRanked and arrayEnumerateDenseRanked functions --- .../functions/array-functions.md | 85 +++++++++++++++++-- 1 file changed, 80 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7377174ace9e..8b7680d72cd0 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -939,6 +939,66 @@ SELECT arrayEnumerateUniq([1, 1, 1, 2, 2, 2], [1, 1, 2, 1, 1, 2]) AS res This is necessary when using ARRAY JOIN with a nested data structure and further aggregation across multiple elements in this structure. +## arrayEnumerateUniqRanked + +Returns an array the same size as the source array, indicating for each element what its position is among elements with the same value. It allows for enumeration of a multidimensional array with the ability to specify how deep to look inside the array. + +**Syntax** + +```sql +arrayEnumerateUniqRanked(clear_depth, arr, max_array_depth) +``` + +**Parameters** + +- `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. +- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md) +- `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`. + +**Example** + +With `clear_depth=1` and `max_array_depth=1`, the result of `arrayEnumerateUniqRanked` is identical to that which [`arrayEnumerateUniq`](#arrayenumerateuniqarr) would give for the same array. + +Query: + +``` sql +SELECT arrayEnumerateUniqRanked(1, [1,2,1], 1); +``` + +Result: + +``` text +[1,1,2] +``` + +In this example, `arrayEnumerateUniqRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[1,2,3]`, the corresponding result is `[1,1,1]`, indicating that this is the first time `1`,`2` and `3` are encountered. For the second row of the provided array,`[2,2,1]`, the corresponding result is `[2,3,3]`, indicating that the number `2` is encountered for a second and third time, and `1` is encountered for the second time. Likewise, for the third row of the provided array `[3]` the corresponding result is `[2]` indicating that the number `3` is encountered for the second time. + +Query: + +``` sql +SELECT arrayEnumerateUniqRanked(1, [[1,2,3],[2,2,1],[3]], 2); +``` + +Result: + +``` text +[[1,1,1],[2,3,2],[2]] +``` + +Changing `clear_depth=2`, results in elements being enumerated seperately for each row. + +Query: + +``` sql +SELECT arrayEnumerateUniqRanked(2, [[1,2,3],[2,2,1],[3]], 2); +``` + +Result: + +``` text +[[1,1,1],[1,2,1],[1]] +``` + ## arrayPopBack Removes the last item from the array. @@ -1478,7 +1538,7 @@ Result: ``` ## arrayEnumerateDenseRanked -Enumerates distinct values of the passed multidimensional array, looking inside at the specified depths. +Returns an array the same size as the source array, indicating where each element first appears in the source array. It allows for enumeration of a multidimensional array with the ability to specify how deep to look inside the array. **Syntax** @@ -1488,7 +1548,7 @@ arrayEnumerateDenseRanked(clear_depth, arr, max_array_depth) **Parameters** -- `clear_depth`: Enumerate elements at the specified level separately. (The enumeration counter is reset for each new element). Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. +- `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. - `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md) - `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`. @@ -1508,18 +1568,33 @@ Result: [1,2,1,3] ``` -`arrayEnumerateDenseRanked` can be used to enumerate multidimensional arrays. +In this example, `arrayEnumerateDenseRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[10,10,30,20]`, the corresponding first row of the result is `[1,1,2,3]`, indicating that `10` is the first element encountered in position 1 and 2, `30` the second element encountered in position 3 and `20` is the third element encountered in position 4. For the second row, `[40, 50, 10, 30]`, the corresponding second row of the result is `[4,5,1,2]`, indicating that `40` and `50` are the fourth and fifth numbers encountered in position 1 and 2 of that row, that another `10` (the first encountered number) is in position 3 and `30` (the second number encountered) is in the last position. + + +Query: + +``` sql +SELECT arrayEnumerateDenseRanked(1,[[10,10,30,20],[40,50,10,30]],2); +``` + +Result: + +``` text +[[1,1,2,3],[4,5,1,2]] +``` + +Changing `clear_depth=2` results in the enumeration occuring separetely for each row anew. Query: ``` sql -SELECT arrayEnumerateDenseRanked(2,[[10, 20, 10, 30],[40, 50, 60, 70]],2); +SELECT arrayEnumerateDenseRanked(2,[[10,10,30,20],[40,50,10,30]],2); ``` Result: ``` text -[[1,2,1,3],[4,5,6,7]] +[[1,1,2,3],[1,2,3,4]] ``` ## arrayIntersect(arr) From a0925e6bc4efcbb0d9c3e8e8475810a566c63fa1 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 4 Apr 2024 14:19:19 +0200 Subject: [PATCH 238/470] empty commit From b7c0501ac292f64992a06a4b880520174cf3cf00 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Thu, 4 Apr 2024 09:26:37 -0300 Subject: [PATCH 239/470] fix ut --- src/IO/tests/gtest_s3_uri.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 9c1f7bd5219d..175550accccd 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -103,7 +103,7 @@ const TestCase TestCases[] = { false}, // Non standard port {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com:65535/root/nested/file.txt"), - "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com:65535", + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com:65535", "root", "nested/file.txt", "", From fb07a2f5ed9b3249990d9ef3664bf479a4b0a42d Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 14:53:24 +0200 Subject: [PATCH 240/470] Add arrayFirstOrNull function --- .../functions/array-functions.md | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 8b7680d72cd0..f3c893cbd6df 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2109,14 +2109,54 @@ Note that the `arrayAll` is a [higher-order function](../../sql-reference/functi Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. +## arrayFirstOrNull + +Returns the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise it returns `NULL`. + +**Syntax** + +```sql +arrayFirstOrNull(func, arr1, …) +``` + +**Parameters** + +- `func`: lambda function. +- `arr1`: array to operate on. [Array](../) + +**Returned value** + +- The first element in the passed array. +- Otherwise, returns `NULL` + +**Implementation details** + Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +**Example** + +Query: + +```sql +SELECT arrayFirstOrNull(x -> x >= 2, emptyArrayUInt8()); +``` + +Result: + +```response +\N +``` + ## arrayLast(func, arr1, …) Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. Note that the `arrayLast` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +## arrayLastOrNull + + + ## arrayFirstIndex(func, arr1, …) Returns the index of the first element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. From 9e1d8b8aaa438d7280c10f966102fab2aa1ae46b Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Thu, 4 Apr 2024 15:22:09 +0200 Subject: [PATCH 241/470] Fix mortonEncode with no rows --- src/Functions/mortonEncode.cpp | 3 +++ .../queries/0_stateless/03035_morton_encode_no_rows.reference | 2 ++ tests/queries/0_stateless/03035_morton_encode_no_rows.sql | 2 ++ 3 files changed, 7 insertions(+) create mode 100644 tests/queries/0_stateless/03035_morton_encode_no_rows.reference create mode 100644 tests/queries/0_stateless/03035_morton_encode_no_rows.sql diff --git a/src/Functions/mortonEncode.cpp b/src/Functions/mortonEncode.cpp index fee14c7784b5..3b95c114b145 100644 --- a/src/Functions/mortonEncode.cpp +++ b/src/Functions/mortonEncode.cpp @@ -321,6 +321,9 @@ class FunctionMortonEncode: public TargetSpecific::Default::FunctionMortonEncode ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { + if (input_rows_count == 0) + return ColumnUInt64::create(); + return selector.selectAndExecute(arguments, result_type, input_rows_count); } diff --git a/tests/queries/0_stateless/03035_morton_encode_no_rows.reference b/tests/queries/0_stateless/03035_morton_encode_no_rows.reference new file mode 100644 index 000000000000..dc8bb19a028d --- /dev/null +++ b/tests/queries/0_stateless/03035_morton_encode_no_rows.reference @@ -0,0 +1,2 @@ +4294967286 +4294967286 diff --git a/tests/queries/0_stateless/03035_morton_encode_no_rows.sql b/tests/queries/0_stateless/03035_morton_encode_no_rows.sql new file mode 100644 index 000000000000..2663b1ac2126 --- /dev/null +++ b/tests/queries/0_stateless/03035_morton_encode_no_rows.sql @@ -0,0 +1,2 @@ +SELECT mortonEncode(materialize((1, 1)), 65534, 65533); +SELECT mortonEncode((1, 1), 65534, 65533); From 01fecadabf959bc71377291d5a7cb8eb9ae9048c Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Thu, 4 Apr 2024 15:35:51 +0200 Subject: [PATCH 242/470] Mark format RowBinaryWithDefaults as not suitable for output --- docs/en/interfaces/formats.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index b6235fd11825..37127b3ace47 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -79,7 +79,7 @@ The supported formats are: | [RowBinary](#rowbinary) | ✔ | ✔ | | [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | | [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | +| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✗ | | [Native](#native) | ✔ | ✔ | | [Null](#null) | ✗ | ✔ | | [XML](#xml) | ✗ | ✔ | From 368dcf03ecc9426497aaaf739e9efa5fb43e1f63 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 4 Apr 2024 15:37:38 +0200 Subject: [PATCH 243/470] Run new analyzer tests with enabled allow_experimental_analyzer --- tests/queries/0_stateless/03033_cte_numbers_memory.sql | 1 + tests/queries/0_stateless/03033_with_fill_interpolate.sql | 1 + tests/queries/0_stateless/03034_normalized_ast.sql | 1 + tests/queries/0_stateless/03035_alias_column_bug_distributed.sql | 1 + tests/queries/0_stateless/03036_with_numbers.sql | 1 + tests/queries/0_stateless/03037_union_view.sql | 1 + tests/queries/0_stateless/03038_ambiguous_column.sql | 1 + .../0_stateless/03039_unknown_identifier_window_function.sql | 1 + tests/queries/0_stateless/03040_alias_column_join.sql | 1 + tests/queries/0_stateless/03040_array_sum_and_join.sql | 1 + tests/queries/0_stateless/03041_analyzer_gigachad_join.sql | 1 + tests/queries/0_stateless/03041_select_with_query_result.sql | 1 + tests/queries/0_stateless/03042_analyzer_alias_join.sql | 1 + tests/queries/0_stateless/03042_not_found_column_c1.sql | 1 + .../queries/0_stateless/03043_group_array_result_is_expected.sql | 1 + tests/queries/0_stateless/03044_analyzer_alias_join.sql | 1 + .../0_stateless/03044_array_join_columns_in_nested_table.sql | 1 + tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql | 1 + .../0_stateless/03045_unknown_identifier_alias_substitution.sql | 1 + tests/queries/0_stateless/03046_column_in_block_array_join.sql | 1 + tests/queries/0_stateless/03047_analyzer_alias_join.sql | 1 + .../0_stateless/03047_group_by_field_identified_aggregation.sql | 1 + .../queries/0_stateless/03048_not_found_column_xxx_in_block.sql | 1 + tests/queries/0_stateless/03049_analyzer_group_by_alias.sql | 1 + .../0_stateless/03049_unknown_identifier_materialized_column.sql | 1 + tests/queries/0_stateless/03050_select_one_one_one.sql | 1 + tests/queries/0_stateless/03051_many_ctes.sql | 1 + tests/queries/0_stateless/03052_query_hash_includes_aliases.sql | 1 + tests/queries/0_stateless/03053_analyzer_join_alias.sql | 1 + tests/queries/0_stateless/03054_analyzer_join_alias.sql | 1 + .../queries/0_stateless/03055_analyzer_subquery_group_array.sql | 1 + .../queries/0_stateless/03056_analyzer_double_subquery_alias.sql | 1 + tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql | 1 + tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql | 1 + .../0_stateless/03059_analyzer_join_engine_missing_column.sql | 1 + tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql | 1 + .../0_stateless/03061_analyzer_alias_as_right_key_in_join.sql | 1 + .../0_stateless/03062_analyzer_join_engine_missing_column.sql | 1 + .../03063_analyzer_multi_join_wrong_table_specifier.sql | 1 + tests/queries/0_stateless/03064_analyzer_named_subqueries.sql | 1 + .../0_stateless/03065_analyzer_cross_join_and_array_join.sql | 1 + .../queries/0_stateless/03066_analyzer_global_with_statement.sql | 1 + tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql | 1 + tests/queries/0_stateless/03068_analyzer_distributed_join.sql | 1 + .../0_stateless/03069_analyzer_with_alias_in_array_join.sql | 1 + .../queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql | 1 + .../03071_analyzer_array_join_forbid_non_existing_columns.sql | 1 + .../0_stateless/03072_analyzer_missing_columns_from_subquery.sql | 1 + .../queries/0_stateless/03073_analyzer_alias_as_column_name.sql | 1 + .../queries/0_stateless/03074_analyzer_alias_column_in_view.sql | 1 + tests/queries/0_stateless/03075_analyzer_subquery_alias.sql | 1 + .../queries/0_stateless/03076_analyzer_multiple_joins_alias.sql | 1 + .../0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql | 1 + .../0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql | 1 + .../03079_analyzer_numeric_literals_as_column_names.sql | 1 + ...080_analyzer_prefer_column_name_to_alias__virtual_columns.sql | 1 + tests/queries/0_stateless/03080_incorrect_join_with_merge.sql | 1 + tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql | 1 + .../0_stateless/03082_analyzer_left_join_correct_column.sql | 1 + tests/queries/0_stateless/03084_analyzer_join_column_alias.sql | 1 + .../queries/0_stateless/03085_analyzer_alias_column_group_by.sql | 1 + .../0_stateless/03086_analyzer_window_func_part_of_group_by.sql | 1 + tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql | 1 + .../0_stateless/03088_analyzer_ambiguous_column_multi_call.sql | 1 + tests/queries/0_stateless/03089_analyzer_alias_replacement.sql | 1 + 65 files changed, 65 insertions(+) diff --git a/tests/queries/0_stateless/03033_cte_numbers_memory.sql b/tests/queries/0_stateless/03033_cte_numbers_memory.sql index 0e3ee9abd652..66b11cbfaa5f 100644 --- a/tests/queries/0_stateless/03033_cte_numbers_memory.sql +++ b/tests/queries/0_stateless/03033_cte_numbers_memory.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/61238 +SET allow_experimental_analyzer=1; WITH (SELECT number FROM system.numbers LIMIT 1) as w1, diff --git a/tests/queries/0_stateless/03033_with_fill_interpolate.sql b/tests/queries/0_stateless/03033_with_fill_interpolate.sql index 816633af757b..0ec0050a9221 100644 --- a/tests/queries/0_stateless/03033_with_fill_interpolate.sql +++ b/tests/queries/0_stateless/03033_with_fill_interpolate.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/55794 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS 03033_example_table; CREATE TABLE 03033_example_table diff --git a/tests/queries/0_stateless/03034_normalized_ast.sql b/tests/queries/0_stateless/03034_normalized_ast.sql index dd109eb5113e..385af4e2c34c 100644 --- a/tests/queries/0_stateless/03034_normalized_ast.sql +++ b/tests/queries/0_stateless/03034_normalized_ast.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/49472 +SET allow_experimental_analyzer=1; SELECT concat(database, table) AS name, count() diff --git a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql index 3a7b4890bf01..74463743b011 100644 --- a/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql +++ b/tests/queries/0_stateless/03035_alias_column_bug_distributed.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/44414 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS alias_bug; DROP TABLE IF EXISTS alias_bug_dist; CREATE TABLE alias_bug diff --git a/tests/queries/0_stateless/03036_with_numbers.sql b/tests/queries/0_stateless/03036_with_numbers.sql index 5e08bb6e0652..3463ce826e2d 100644 --- a/tests/queries/0_stateless/03036_with_numbers.sql +++ b/tests/queries/0_stateless/03036_with_numbers.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/13843 +SET allow_experimental_analyzer=1; WITH 10 AS n SELECT * FROM numbers(n); diff --git a/tests/queries/0_stateless/03037_union_view.sql b/tests/queries/0_stateless/03037_union_view.sql index fb8aa7df9542..3ea81b829bab 100644 --- a/tests/queries/0_stateless/03037_union_view.sql +++ b/tests/queries/0_stateless/03037_union_view.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/55803 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS broken_table; DROP TABLE IF EXISTS broken_view; diff --git a/tests/queries/0_stateless/03038_ambiguous_column.sql b/tests/queries/0_stateless/03038_ambiguous_column.sql index 69c8e52d7341..9df3cd9bc9bd 100644 --- a/tests/queries/0_stateless/03038_ambiguous_column.sql +++ b/tests/queries/0_stateless/03038_ambiguous_column.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/48308 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS 03038_table; CREATE TABLE 03038_table diff --git a/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql b/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql index ca3bb521eba2..640d217d2f96 100644 --- a/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql +++ b/tests/queries/0_stateless/03039_unknown_identifier_window_function.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/45535 +SET allow_experimental_analyzer=1; SELECT *, diff --git a/tests/queries/0_stateless/03040_alias_column_join.sql b/tests/queries/0_stateless/03040_alias_column_join.sql index f4ea2e5914df..54f579c0feb1 100644 --- a/tests/queries/0_stateless/03040_alias_column_join.sql +++ b/tests/queries/0_stateless/03040_alias_column_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/44365 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS 03040_test; CREATE TABLE 03040_test diff --git a/tests/queries/0_stateless/03040_array_sum_and_join.sql b/tests/queries/0_stateless/03040_array_sum_and_join.sql index 0084f0e4c7bb..9aeddc9f7652 100644 --- a/tests/queries/0_stateless/03040_array_sum_and_join.sql +++ b/tests/queries/0_stateless/03040_array_sum_and_join.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer=1; select t.1 as cnt, t.2 as name, diff --git a/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql b/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql index 462e63b121b6..7906e65f8b8b 100644 --- a/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql +++ b/tests/queries/0_stateless/03041_analyzer_gigachad_join.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer=1; CREATE TABLE IF NOT EXISTS first engine = MergeTree PARTITION BY (inn, toYYYYMM(received)) ORDER BY (inn, sessionId) AS SELECT now() AS received, '123456789' AS inn, '42' AS sessionId; diff --git a/tests/queries/0_stateless/03041_select_with_query_result.sql b/tests/queries/0_stateless/03041_select_with_query_result.sql index 3edf51d635e7..061223b43e13 100644 --- a/tests/queries/0_stateless/03041_select_with_query_result.sql +++ b/tests/queries/0_stateless/03041_select_with_query_result.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/44153 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS parent; DROP TABLE IF EXISTS join_table_1; DROP TABLE IF EXISTS join_table_2; diff --git a/tests/queries/0_stateless/03042_analyzer_alias_join.sql b/tests/queries/0_stateless/03042_analyzer_alias_join.sql index f3341fd314a8..dac3b6a4983e 100644 --- a/tests/queries/0_stateless/03042_analyzer_alias_join.sql +++ b/tests/queries/0_stateless/03042_analyzer_alias_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/14978 +SET allow_experimental_analyzer=1; CREATE TABLE test1(id UInt64, t1value UInt64) ENGINE=MergeTree ORDER BY tuple(); CREATE TABLE test2(id UInt64, t2value String) ENGINE=MergeTree ORDER BY tuple(); diff --git a/tests/queries/0_stateless/03042_not_found_column_c1.sql b/tests/queries/0_stateless/03042_not_found_column_c1.sql index 8ce7dcd9d4f8..b4dce2af4895 100644 --- a/tests/queries/0_stateless/03042_not_found_column_c1.sql +++ b/tests/queries/0_stateless/03042_not_found_column_c1.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/42399 +SET allow_experimental_analyzer=1; CREATE TABLE IF NOT EXISTS t0 (c0 Int32) ENGINE = Memory() ; CREATE TABLE t1 (c0 Int32, c1 Int32, c2 Int32) ENGINE = Memory() ; diff --git a/tests/queries/0_stateless/03043_group_array_result_is_expected.sql b/tests/queries/0_stateless/03043_group_array_result_is_expected.sql index df77ca666471..5311927ae3cf 100644 --- a/tests/queries/0_stateless/03043_group_array_result_is_expected.sql +++ b/tests/queries/0_stateless/03043_group_array_result_is_expected.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/27115 +SET allow_experimental_analyzer=1; drop table if exists fill_ex; create table fill_ex ( diff --git a/tests/queries/0_stateless/03044_analyzer_alias_join.sql b/tests/queries/0_stateless/03044_analyzer_alias_join.sql index 5202b57a7b11..3ab8edb005f3 100644 --- a/tests/queries/0_stateless/03044_analyzer_alias_join.sql +++ b/tests/queries/0_stateless/03044_analyzer_alias_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/17319 +SET allow_experimental_analyzer=1; CREATE TEMPORARY TABLE hits (date Date, data Float64) engine=Memory(); SELECT diff --git a/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql b/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql index f3ec80b8a94c..0cf057632023 100644 --- a/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql +++ b/tests/queries/0_stateless/03044_array_join_columns_in_nested_table.sql @@ -1,2 +1,3 @@ -- https://github.com/ClickHouse/ClickHouse/issues/11813 +SET allow_experimental_analyzer=1; select 1 from (select 1 x) l join (select 1 y, [1] a) r on l.x = r.y array join r.a; diff --git a/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql b/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql index a0546f57736f..ee8756b94603 100644 --- a/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql +++ b/tests/queries/0_stateless/03045_analyzer_alias_join_with_if.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/13210 +SET allow_experimental_analyzer=1; CREATE TABLE test_a_table ( name String, a_col String diff --git a/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql b/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql index cadcbdc0ce54..d97dfc880b3d 100644 --- a/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql +++ b/tests/queries/0_stateless/03045_unknown_identifier_alias_substitution.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/23053 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS repl_tbl; CREATE TEMPORARY TABLE repl_tbl diff --git a/tests/queries/0_stateless/03046_column_in_block_array_join.sql b/tests/queries/0_stateless/03046_column_in_block_array_join.sql index 9a2bb19d81e0..c6b4613af3f2 100644 --- a/tests/queries/0_stateless/03046_column_in_block_array_join.sql +++ b/tests/queries/0_stateless/03046_column_in_block_array_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/37729 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS nested_test; DROP TABLE IF EXISTS join_test; diff --git a/tests/queries/0_stateless/03047_analyzer_alias_join.sql b/tests/queries/0_stateless/03047_analyzer_alias_join.sql index ef8c067bb72d..7d44c92b6f12 100644 --- a/tests/queries/0_stateless/03047_analyzer_alias_join.sql +++ b/tests/queries/0_stateless/03047_analyzer_alias_join.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer=1; SELECT 1 AS value, * diff --git a/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql b/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql index e1363ea4ddac..cfaf1df44bd1 100644 --- a/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql +++ b/tests/queries/0_stateless/03047_group_by_field_identified_aggregation.sql @@ -1,3 +1,4 @@ -- https://github.com/ClickHouse/ClickHouse/issues/32639 +SET allow_experimental_analyzer=1; SELECT 0 AND id ? 1 : 2 AS a, sum(id) FROM (SELECT 1 AS id) GROUP BY a; diff --git a/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql b/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql index 25f88050eb12..42fd581e1420 100644 --- a/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql +++ b/tests/queries/0_stateless/03048_not_found_column_xxx_in_block.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/41964 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS ab_12_aaa; DROP TABLE IF EXISTS ab_12_bbb; diff --git a/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql b/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql index 67df48e0cad6..d25babe6788b 100644 --- a/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql +++ b/tests/queries/0_stateless/03049_analyzer_group_by_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/7520 +SET allow_experimental_analyzer=1; CREATE TABLE test (`a` UInt32, `b` UInt32) ENGINE = Memory; INSERT INTO test VALUES (1,2), (1,3), (2,4); diff --git a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql index 2f12799addb3..a1c858a329c4 100644 --- a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql +++ b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/54317 +SET allow_experimental_analyzer=1; DROP DATABASE IF EXISTS 03049_database; DROP TABLE IF EXISTS 03049_database.l; DROP TABLE IF EXISTS 03049_database.r; diff --git a/tests/queries/0_stateless/03050_select_one_one_one.sql b/tests/queries/0_stateless/03050_select_one_one_one.sql index eee973fe9365..28a55e0c4715 100644 --- a/tests/queries/0_stateless/03050_select_one_one_one.sql +++ b/tests/queries/0_stateless/03050_select_one_one_one.sql @@ -1,3 +1,4 @@ -- https://github.com/ClickHouse/ClickHouse/issues/36973 +SET allow_experimental_analyzer=1; SELECT 1, 1, 1; SELECT * FROM (SELECT 1, 1, 1); diff --git a/tests/queries/0_stateless/03051_many_ctes.sql b/tests/queries/0_stateless/03051_many_ctes.sql index 412a1e6b544c..d4e613bd279e 100644 --- a/tests/queries/0_stateless/03051_many_ctes.sql +++ b/tests/queries/0_stateless/03051_many_ctes.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/40955 +SET allow_experimental_analyzer=1; WITH toInt64(2) AS new_x SELECT new_x AS x FROM (SELECT 1 AS x) t; WITH toInt64(2) AS new_x SELECT * replace(new_x as x) FROM (SELECT 1 AS x) t; SELECT 2 AS x FROM (SELECT 1 AS x) t; diff --git a/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql b/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql index e76108c78429..24e9ab0f36e9 100644 --- a/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql +++ b/tests/queries/0_stateless/03052_query_hash_includes_aliases.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/pull/40065 +SET allow_experimental_analyzer=1; SELECT ( diff --git a/tests/queries/0_stateless/03053_analyzer_join_alias.sql b/tests/queries/0_stateless/03053_analyzer_join_alias.sql index 7e11cc7c810e..ef51ec730261 100644 --- a/tests/queries/0_stateless/03053_analyzer_join_alias.sql +++ b/tests/queries/0_stateless/03053_analyzer_join_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/23104 +SET allow_experimental_analyzer=1; DROP DATABASE IF EXISTS test_03053; CREATE DATABASE test_03053; diff --git a/tests/queries/0_stateless/03054_analyzer_join_alias.sql b/tests/queries/0_stateless/03054_analyzer_join_alias.sql index 0bf93258aa62..e124aa33a9b3 100644 --- a/tests/queries/0_stateless/03054_analyzer_join_alias.sql +++ b/tests/queries/0_stateless/03054_analyzer_join_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/21584 +SET allow_experimental_analyzer=1; SELECT count() FROM ( diff --git a/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql index 071d8f8e1c81..25b6dcb3564a 100644 --- a/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql +++ b/tests/queries/0_stateless/03055_analyzer_subquery_group_array.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/23344 +SET allow_experimental_analyzer=1; SELECT logTrace(repeat('Hello', 100)), ignore(*) FROM ( SELECT ignore((SELECT groupArrayState(([number], [number])) FROM numbers(19000))) diff --git a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql index b86ae97c8bfd..de471c1a0911 100644 --- a/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql +++ b/tests/queries/0_stateless/03056_analyzer_double_subquery_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/22627 +SET allow_experimental_analyzer=1; WITH x AS ( diff --git a/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql index 13852471dca5..2217af327fa2 100644 --- a/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql +++ b/tests/queries/0_stateless/03057_analyzer_subquery_alias_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/10276 +SET allow_experimental_analyzer=1; SELECT sum(x.n) as n, sum(z.n) as n2 diff --git a/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql index 47df6e76a389..3cce77f02403 100644 --- a/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql +++ b/tests/queries/0_stateless/03058_analyzer_ambiguous_columns.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/4567 +SET allow_experimental_analyzer=1; DROP TABLE IF EXISTS fact; DROP TABLE IF EXISTS animals; DROP TABLE IF EXISTS colors; diff --git a/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql index 76150335f7ef..27782462075e 100644 --- a/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql +++ b/tests/queries/0_stateless/03059_analyzer_join_engine_missing_column.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/17710 +SET allow_experimental_analyzer=1; CREATE TABLE id_val(id UInt32, val UInt32) ENGINE = Memory; CREATE TABLE id_val_join0(id UInt32, val UInt8) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 0; CREATE TABLE id_val_join1(id UInt32, val UInt8) ENGINE = Join(ANY, LEFT, id) SETTINGS join_use_nulls = 1; diff --git a/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql index ba0257d7b3bc..f8cd8690ee50 100644 --- a/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql +++ b/tests/queries/0_stateless/03060_analyzer_regular_view_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/11068 +SET allow_experimental_analyzer=1; create table vt(datetime_value DateTime, value Float64) Engine=Memory; create view computed_datum_hours as diff --git a/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql index e223909a5a8e..6fee6d1f73d3 100644 --- a/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql +++ b/tests/queries/0_stateless/03061_analyzer_alias_as_right_key_in_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/24395 +SET allow_experimental_analyzer=1; CREATE TABLE xxxx_yyy (key UInt32, key_b ALIAS key) ENGINE=MergeTree() ORDER BY key; INSERT INTO xxxx_yyy SELECT number FROM numbers(10); diff --git a/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql index 6c24ef6f66d0..9748175e4d4a 100644 --- a/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql +++ b/tests/queries/0_stateless/03062_analyzer_join_engine_missing_column.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/23416 +SET allow_experimental_analyzer=1; create table test (TOPIC String, PARTITION UInt64, OFFSET UInt64, ID UInt64) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_03062', 'r2') ORDER BY (TOPIC, PARTITION, OFFSET); create table test_join (TOPIC String, PARTITION UInt64, OFFSET UInt64) ENGINE = Join(ANY, LEFT, `TOPIC`, `PARTITION`) SETTINGS join_any_take_last_row = 1; diff --git a/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql index c2c29b688cdb..7eab1fa846a6 100644 --- a/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql +++ b/tests/queries/0_stateless/03063_analyzer_multi_join_wrong_table_specifier.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/23162 +SET allow_experimental_analyzer=1; CREATE TABLE t1 ( k Int64, x Int64) ENGINE = Memory; CREATE TABLE t2( x Int64 ) ENGINE = Memory; diff --git a/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql b/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql index ef8aca2fefac..59ebb9d9af3c 100644 --- a/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql +++ b/tests/queries/0_stateless/03064_analyzer_named_subqueries.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/25655 +SET allow_experimental_analyzer=1; SELECT sum(t.b) / 1 a, sum(t.a) diff --git a/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql index c270a0f45041..7e6befe181ef 100644 --- a/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql +++ b/tests/queries/0_stateless/03065_analyzer_cross_join_and_array_join.sql @@ -1,2 +1,3 @@ -- https://github.com/ClickHouse/ClickHouse/issues/11757 +SET allow_experimental_analyzer=1; select * from (select [1, 2] a) aa cross join (select [3, 4] b) bb array join aa.a, bb.b; diff --git a/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql b/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql index 338eb30e6ffd..8983be242c38 100644 --- a/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql +++ b/tests/queries/0_stateless/03066_analyzer_global_with_statement.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer=1; WITH 0 AS test SELECT * FROM diff --git a/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql index 7d1264a61162..052a9eaf734b 100644 --- a/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql +++ b/tests/queries/0_stateless/03067_analyzer_complex_alias_join.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer=1; with d as (select 'key'::Varchar(255) c, 'x'::Varchar(255) s) SELECT r1, c as r2 FROM ( diff --git a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql index 714a64a01b67..82f58e9a7500 100644 --- a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql +++ b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/6571 +SET allow_experimental_analyzer=1; CREATE TABLE LINEITEM_shard ON CLUSTER test_shard_localhost ( L_ORDERKEY UInt64, diff --git a/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql index 84ad0b4e199c..09d2985fe601 100644 --- a/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql +++ b/tests/queries/0_stateless/03069_analyzer_with_alias_in_array_join.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/4432 +SET allow_experimental_analyzer=1; WITH [1, 2] AS zz SELECT x FROM system.one diff --git a/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql index 672c4f53e5fe..7aadab2ca736 100644 --- a/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql +++ b/tests/queries/0_stateless/03070_analyzer_CTE_scalar_as_numbers.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/8259 +SET allow_experimental_analyzer=1; with (select 25) as something select *, something diff --git a/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql index af81e3c28190..e2eb758d6495 100644 --- a/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql +++ b/tests/queries/0_stateless/03071_analyzer_array_join_forbid_non_existing_columns.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/9233 +SET allow_experimental_analyzer=1; SELECT * FROM ( diff --git a/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql index 68ff81413b76..e2846033913f 100644 --- a/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql +++ b/tests/queries/0_stateless/03072_analyzer_missing_columns_from_subquery.sql @@ -1,2 +1,3 @@ -- https://github.com/ClickHouse/ClickHouse/issues/14699 +SET allow_experimental_analyzer=1; select * from (select number from numbers(1)) where not ignore(*); diff --git a/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql index 0e7d2eb95c71..5599324c62b1 100644 --- a/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql +++ b/tests/queries/0_stateless/03073_analyzer_alias_as_column_name.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/27068 +SET allow_experimental_analyzer=1; CREATE TABLE test ( id String, create_time DateTime ) ENGINE = MergeTree ORDER BY id; insert into test values(1,'1970-02-01 00:00:00'); diff --git a/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql index 6f9704217881..4df5f6f48e6f 100644 --- a/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql +++ b/tests/queries/0_stateless/03074_analyzer_alias_column_in_view.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/28687 +SET allow_experimental_analyzer=1; create view alias (dummy int, n alias dummy) as select * from system.one; select n from alias; diff --git a/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql b/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql index 897b189b1330..416815e761b5 100644 --- a/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql +++ b/tests/queries/0_stateless/03075_analyzer_subquery_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/28777 +SET allow_experimental_analyzer=1; SELECT sum(q0.a2) AS a1, sum(q0.a1) AS a9 diff --git a/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql index 8b8b76a5be1c..7ac9fe6b4464 100644 --- a/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql +++ b/tests/queries/0_stateless/03076_analyzer_multiple_joins_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/29734 +SET allow_experimental_analyzer=1; SELECT * FROM ( diff --git a/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql index 3d558bdd602d..5a181023c57b 100644 --- a/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql +++ b/tests/queries/0_stateless/03077_analyzer_multi_scalar_subquery_aliases.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/33825 +SET allow_experimental_analyzer=1; CREATE TABLE t1 (i Int64, j Int64) ENGINE = Memory; INSERT INTO t1 SELECT number, number FROM system.numbers LIMIT 10; SELECT diff --git a/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql index ded6bfbe4e3a..d91a9ed106dd 100644 --- a/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql +++ b/tests/queries/0_stateless/03078_analyzer_multi_scalar_subquery_aliases.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/33825 +SET allow_experimental_analyzer=1; CREATE TABLE t2 (first_column Int64, second_column Int64) ENGINE = Memory; INSERT INTO t2 SELECT number, number FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql index e6bcad345064..955d3b49a004 100644 --- a/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql +++ b/tests/queries/0_stateless/03079_analyzer_numeric_literals_as_column_names.sql @@ -1,3 +1,4 @@ +SET allow_experimental_analyzer=1; CREATE TABLE testdata (`1` String) ENGINE=MergeTree ORDER BY tuple(); INSERT INTO testdata VALUES ('testdata'); diff --git a/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql index 1fe19cdad2a5..01ab868f9eab 100644 --- a/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql +++ b/tests/queries/0_stateless/03080_analyzer_prefer_column_name_to_alias__virtual_columns.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/35652 +SET allow_experimental_analyzer=1; CREATE TABLE test ( id UInt64 ) diff --git a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql index ae8e40f6d56e..4985d3abfb65 100644 --- a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql +++ b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/29838 +SET allow_experimental_analyzer=1; CREATE TABLE first_table_lr ( diff --git a/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql index 81dbbb3b62dd..e6a540dc5df7 100644 --- a/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql +++ b/tests/queries/0_stateless/03081_analyzer_agg_func_CTE.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/36189 +SET allow_experimental_analyzer=1; CREATE TABLE test ( `dt` Date, diff --git a/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql index 93702fee5501..8f17248ed0d6 100644 --- a/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql +++ b/tests/queries/0_stateless/03082_analyzer_left_join_correct_column.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/39634 +SET allow_experimental_analyzer=1; CREATE TABLE test1 ( `pk` String, diff --git a/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql index caf65823532a..8337c0ce9878 100644 --- a/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql +++ b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/47432 +SET allow_experimental_analyzer=1; create or replace table t1 engine = MergeTree() order by tuple() diff --git a/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql index f4eaa5d9710f..fd67194b08b5 100644 --- a/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql +++ b/tests/queries/0_stateless/03085_analyzer_alias_column_group_by.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/54910 +SET allow_experimental_analyzer=1; SELECT toTypeName(stat_standard_id) AS stat_standard_id_1, count(1) AS value FROM ( SELECT 'string value' AS stat_standard_id ) GROUP BY stat_standard_id_1 LIMIT 1 diff --git a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql index 55a60873a5a9..ac03019de201 100644 --- a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql +++ b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/57321 +SET allow_experimental_analyzer=1; SELECT ver, max(ver) OVER () AS ver_max diff --git a/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql index 98aca76fe494..6546e50c99e0 100644 --- a/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql +++ b/tests/queries/0_stateless/03087_analyzer_subquery_with_alias.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/59154 +SET allow_experimental_analyzer=1; SELECT * FROM ( diff --git a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql index 84afdb295c24..09425d2e5036 100644 --- a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql +++ b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/61014 +SET allow_experimental_analyzer=1; create database test_03088; create table test_03088.a (i int) engine = Log(); diff --git a/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql b/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql index 00a3795eab8b..069da5fdd65b 100644 --- a/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql +++ b/tests/queries/0_stateless/03089_analyzer_alias_replacement.sql @@ -1,4 +1,5 @@ -- https://github.com/ClickHouse/ClickHouse/issues/61950 +SET allow_experimental_analyzer=1; with dummy + 1 as dummy select dummy from system.one; From dd64145bc82420220914f45e097fee24f2cb321e Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 15:49:03 +0200 Subject: [PATCH 244/470] Add arrayLastOrNull function --- .../functions/array-functions.md | 60 ++++++++++++++++++- 1 file changed, 59 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index f3c893cbd6df..836e2f7a7e90 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2131,12 +2131,24 @@ arrayFirstOrNull(func, arr1, …) **Implementation details** -Note that the `arrayFirst` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. +Note that the `arrayFirstOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. **Example** Query: +```sql +SELECT arrayFirstOrNull(x -> x >= 2, [1, 2, 3]); +``` + +Result: + +```response +2 +``` + +Query: + ```sql SELECT arrayFirstOrNull(x -> x >= 2, emptyArrayUInt8()); ``` @@ -2155,7 +2167,53 @@ Note that the `arrayLast` is a [higher-order function](../../sql-reference/funct ## arrayLastOrNull +Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0, otherwise returns `NULL`. +**Syntax** + +```sql +arrayLastOrNull(func, arr1, …) +``` + +**Parameters** + +- `func`: lambda function. +- `arr1`: array to operate on. [Array](../) + +**Returned value** + +- The last element in the passed array. +- Otherwise, returns `NULL` + +**Implementation details** + +Note that the `arrayLastOrNull` is a [higher-order function](../../sql-reference/functions/index.md#higher-order-functions). You must pass a lambda function to it as the first argument, and it can’t be omitted. + +**Example** + +Query: + +```sql +SELECT arrayLastOrNull(x -> x >= 2, [1, 2, 3]); +``` + +Result: + +```response +3 +``` + +Query: + +```sql +SELECT arrayLastOrNull(x -> x >= 2, emptyArrayUInt8()); +``` + +Result: + +```response +\N +``` ## arrayFirstIndex(func, arr1, …) From cd1e96a8a10c026214a07bc6bf251c008236fa3c Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 4 Apr 2024 16:22:17 +0200 Subject: [PATCH 245/470] Close: https://github.com/ClickHouse/ClickHouse/issues/55647 --- ...analyzer_multiple_using_statements.reference | 1 + ...03090_analyzer_multiple_using_statements.sql | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 tests/queries/0_stateless/03090_analyzer_multiple_using_statements.reference create mode 100644 tests/queries/0_stateless/03090_analyzer_multiple_using_statements.sql diff --git a/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.reference b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.sql b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.sql new file mode 100644 index 000000000000..c35f33782ff2 --- /dev/null +++ b/tests/queries/0_stateless/03090_analyzer_multiple_using_statements.sql @@ -0,0 +1,17 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/55647 +SET allow_experimental_analyzer=1; + +SELECT +* +FROM ( + SELECT * + FROM system.one +) a +JOIN ( + SELECT * + FROM system.one +) b USING dummy +JOIN ( + SELECT * + FROM system.one +) c USING dummy From 3e3f8ba0f025c33218029bf565ae61b472718883 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 16:22:29 +0200 Subject: [PATCH 246/470] Add arrayPartialShuffle function --- .../functions/array-functions.md | 58 +++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 836e2f7a7e90..3314571d833c 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1414,6 +1414,64 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. +## arrayPartialShuffle + +Returns an array of the same size as the original array where elements in range [1..limit] are a random subset of the original array. Remaining (limit..n] shall contain the elements not in [1..limit] range in undefined order. Value of limit shall be in range [1..n]. Values outside of that range are equivalent to performing full arrayShuffle. + +**Syntax** + +```sql +arrayPartialShuffle(arr, limit, seed) +``` + +**Parameters** + +- `arr`: The array to partially shuffle. [Array](../data-types/array.md) +- `limit` (optional): Specifies how many times to limit element swaps to. []() +- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. []() + +**Returned value** + +- Array with elements shuffled. + +**Implementation details** + +:::note +This function will not materialize constants. +::: + +**Examples** + +In this example, `arrayPartialShuffle` is used without the `limit` and `seed` parameters. + +Query: + +```sql +SELECT arrayPartialShuffle([1, 2, 3, 4], 0); +SELECT arrayPartialShuffle([1, 2, 3, 4]); +``` + +Note: When using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. + +Result: +```response +[3,1,2,4] +[4,1,3,2] +``` + +In this example, the `arrayPartialShuffle` function is provided a `limit` and a `seed`. + +Query: + +```sql +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 5, 0xbad_cafe); +``` + +Result: +```response +[10,9,4,2,5,6,7,8,3,1] +``` + ## arrayUniq(arr, …) If one argument is passed, it counts the number of different elements in the array. From 32d124e9033bd3785eb1b301f908bf8733ff433d Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 4 Apr 2024 16:30:42 +0200 Subject: [PATCH 247/470] Close: https://github.com/ClickHouse/ClickHouse/issues/61947 --- ...able_name_in_different_databases.reference | 4 +++ ...same_table_name_in_different_databases.sql | 28 +++++++++++++++++++ ...able_name_in_different_databases.reference | 1 + ...same_table_name_in_different_databases.sql | 18 ++++++++++++ 4 files changed, 51 insertions(+) create mode 100644 tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.reference create mode 100644 tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql create mode 100644 tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.reference create mode 100644 tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql diff --git a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.reference b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.reference new file mode 100644 index 000000000000..ce45f6636b2e --- /dev/null +++ b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.reference @@ -0,0 +1,4 @@ +1 0 + +using asterisk 1 0 +using field name 1 0 diff --git a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql new file mode 100644 index 000000000000..2185b5f450ae --- /dev/null +++ b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql @@ -0,0 +1,28 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61947 +SET allow_experimental_analyzer=1; + +DROP DATABASE IF EXISTS d1; +DROP DATABASE IF EXISTS d2; + +CREATE DATABASE d1; +CREATE DATABASE d2; +CREATE TABLE d1.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE d2.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE d2.`2-1` (field Int8) ENGINE = Memory; + +INSERT INTO d1.`1-1` VALUES (1); + +SELECT * +FROM d1.`1-1` +LEFT JOIN d2.`1-1` ON d1.`1-1`.field = d2.`1-1`.field; + +SELECT ''; + +SELECT 'using asterisk', d1.`1-1`.*, d2.`1-1`.* +FROM d1.`1-1` +LEFT JOIN d2.`1-1` USING field +UNION ALL +SELECT 'using field name', d1.`1-1`.field, d2.`1-1`.field +FROM d1.`1-1` +LEFT JOIN d2.`1-1` USING field +ORDER BY *; diff --git a/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.reference b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql new file mode 100644 index 000000000000..03ad9c97d94d --- /dev/null +++ b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql @@ -0,0 +1,18 @@ +-- https://github.com/ClickHouse/ClickHouse/issues/61947 +SET allow_experimental_analyzer=1; + +DROP DATABASE IF EXISTS d3; +DROP DATABASE IF EXISTS d4; + +CREATE DATABASE d3; +CREATE DATABASE d4; +CREATE TABLE d3.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE d4.`2-1` (field Int8) ENGINE = Memory; +CREATE TABLE d4.`3-1` (field Int8) ENGINE = Memory; + +INSERT INTO d3.`1-1` VALUES (1); + +SELECT d3.`1-1`.* +FROM d3.`1-1` +LEFT JOIN d4.`2-1` ON d3.`1-1`.field = d4.`2-1`.field +LEFT JOIN d4.`3-1` ON d4.`2-1`.field = d4.`3-1`.field; From 34a21199451d99cef8190a80e9ca498b0bfde175 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2024 16:32:29 +0200 Subject: [PATCH 248/470] Public visibility --- contrib/nuraft-cmake/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index 140fb11c6d0e..a57b7ac6225b 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -52,7 +52,7 @@ else() endif() target_link_libraries (_nuraft PRIVATE clickhouse_common_io) -target_compile_definitions(_nuraft PRIVATE USE_CLICKHOUSE_THREADS=1) +target_compile_definitions(_nuraft PUBLIC USE_CLICKHOUSE_THREADS=1) MESSAGE(STATUS "Will use clickhouse threads for NuRaft") target_include_directories (_nuraft SYSTEM PRIVATE "${LIBRARY_DIR}/include/libnuraft") From cf03ced5a5d92fd1d6e5cec9071a0301a9def21d Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 16:34:18 +0200 Subject: [PATCH 249/470] Added arrayShuffle function --- .../functions/array-functions.md | 66 +++++++++++++++++-- 1 file changed, 61 insertions(+), 5 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 3314571d833c..7d33f3d9949e 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1414,9 +1414,65 @@ SELECT arrayReverseSort((x, y) -> -y, [4, 3, 5], [1, 2, 3]) AS res; Same as `arrayReverseSort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in descending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. +## arrayShuffle + +Returns an array of the same size as the original array containing the elements in shuffled order. +Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. + +**Syntax** + +```sql +arrayShuffle(arr, seed) +``` + +**Parameters** + +- `arr`: The array to partially shuffle. [Array](../data-types/array.md) +- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md) + +- Array with elements shuffled. + +**Implementation details** + +:::note +This function will not materialize constants. +::: + +**Examples** + +In this example, `arrayShuffle` is used with providing a `seed` and will use its own randomly generated one. + +Query: + +```sql +SELECT arrayShuffle([1, 2, 3, 4]); +``` + +Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. + +Result: + +```response +[1,4,2,3] +``` + +In this example, `arrayShuffle` is provided a `seed` and will produce stable results. + +Query: + +```sql +SELECT arrayShuffle([1, 2, 3, 4], 41); +``` + +Result: + +```response +[3,2,1,4] +``` + ## arrayPartialShuffle -Returns an array of the same size as the original array where elements in range [1..limit] are a random subset of the original array. Remaining (limit..n] shall contain the elements not in [1..limit] range in undefined order. Value of limit shall be in range [1..n]. Values outside of that range are equivalent to performing full arrayShuffle. +Returns an array of the same size as the original array where elements in range [1..limit] are a random subset of the original array. Remaining (limit..n] shall contain the elements not in [1..limit] range in undefined order. Value of limit shall be in range [1..n]. Values outside of that range are equivalent to performing full [arrayShuffle](#arrayShuffle). **Syntax** @@ -1427,12 +1483,12 @@ arrayPartialShuffle(arr, limit, seed) **Parameters** - `arr`: The array to partially shuffle. [Array](../data-types/array.md) -- `limit` (optional): Specifies how many times to limit element swaps to. []() -- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. []() +- `limit` (optional): Specifies how many times to limit element swaps to. [UInt or Int](../data-types/int-uint.md) +- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md) **Returned value** -- Array with elements shuffled. +- Array with elements partially shuffled. **Implementation details** @@ -1451,7 +1507,7 @@ SELECT arrayPartialShuffle([1, 2, 3, 4], 0); SELECT arrayPartialShuffle([1, 2, 3, 4]); ``` -Note: When using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. +Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. Result: ```response From d9e7c0a662b609ae747a729e19e992b8591f7cd8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2024 16:36:12 +0200 Subject: [PATCH 250/470] Add comment --- contrib/nuraft-cmake/CMakeLists.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/contrib/nuraft-cmake/CMakeLists.txt b/contrib/nuraft-cmake/CMakeLists.txt index a57b7ac6225b..736e91e359d7 100644 --- a/contrib/nuraft-cmake/CMakeLists.txt +++ b/contrib/nuraft-cmake/CMakeLists.txt @@ -52,6 +52,8 @@ else() endif() target_link_libraries (_nuraft PRIVATE clickhouse_common_io) +# We must have it PUBLIC here because some headers which depend on it directly +# included in clickhouse target_compile_definitions(_nuraft PUBLIC USE_CLICKHOUSE_THREADS=1) MESSAGE(STATUS "Will use clickhouse threads for NuRaft") From 58e6bd82cff7c18e19ce52ea97c993456aba60df Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Thu, 4 Apr 2024 16:37:49 +0200 Subject: [PATCH 251/470] Add missing ORDER BY --- .../0_stateless/03086_analyzer_window_func_part_of_group_by.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql index ac03019de201..31747328d1fc 100644 --- a/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql +++ b/tests/queries/0_stateless/03086_analyzer_window_func_part_of_group_by.sql @@ -10,3 +10,4 @@ FROM SELECT 2 AS ver ) GROUP BY ver +ORDER BY ver; From 85d98e1a421cecb8be9051331f8fdd46bfd557ea Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2024 16:51:46 +0200 Subject: [PATCH 252/470] Modernize code a little --- src/Common/QueryProfiler.cpp | 16 ++-------------- src/Common/ThreadStatus.cpp | 2 +- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/src/Common/QueryProfiler.cpp b/src/Common/QueryProfiler.cpp index b616b7954050..d9ff3a86e4ad 100644 --- a/src/Common/QueryProfiler.cpp +++ b/src/Common/QueryProfiler.cpp @@ -210,23 +210,13 @@ void Timer::cleanup() #endif template -QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_type, UInt32 period, int pause_signal_) +QueryProfilerBase::QueryProfilerBase([[maybe_unused]] UInt64 thread_id, [[maybe_unused]] int clock_type, [[maybe_unused]] UInt32 period, [[maybe_unused]] int pause_signal_) : log(getLogger("QueryProfiler")) , pause_signal(pause_signal_) { #if defined(SANITIZER) - UNUSED(thread_id); - UNUSED(clock_type); - UNUSED(period); - UNUSED(pause_signal); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); #elif defined(__APPLE__) - UNUSED(thread_id); - UNUSED(clock_type); - UNUSED(period); - UNUSED(pause_signal); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); #else /// Sanity check. @@ -262,13 +252,11 @@ QueryProfilerBase::QueryProfilerBase(UInt64 thread_id, int clock_t template -void QueryProfilerBase::setPeriod(UInt32 period_) +void QueryProfilerBase::setPeriod([[maybe_unused]] UInt32 period_) { #if defined(SANITIZER) - UNUSED(period); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers"); #elif defined(__APPLE__) - UNUSED(period); throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX"); #else timer.set(period_); diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index a3a7257b4724..d4a40360a3b3 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -124,7 +124,7 @@ ThreadStatus::ThreadStatus(bool check_current_thread_on_destruction_) #endif } -void ThreadStatus::initGlobalProfiler(UInt64 global_profiler_real_time_period, UInt64 global_profiler_cpu_time_period) +void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_real_time_period, [[maybe_unused]] UInt64 global_profiler_cpu_time_period) { #if !defined(SANITIZER) && !defined(CLICKHOUSE_KEEPER_STANDALONE_BUILD) && !defined(__APPLE__) try From ff054b5ab304a866e7fb95628082f582216cb67a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 4 Apr 2024 17:14:55 +0200 Subject: [PATCH 253/470] With better name --- contrib/NuRaft | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/NuRaft b/contrib/NuRaft index 717657cd94da..cb5dc3c906e8 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 717657cd94da01e86733e58f8d3f0ca0d8748712 +Subproject commit cb5dc3c906e80f253e9ce9535807caef827cc2e0 From 51aff6d63c4c55a8d18470103a24f004fde0ee67 Mon Sep 17 00:00:00 2001 From: Misz606 <113922942+Misz606@users.noreply.github.com> Date: Thu, 4 Apr 2024 12:09:05 -0400 Subject: [PATCH 254/470] Update README.md Localy should be spelled locally. --- tests/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/README.md b/tests/README.md index a1fc0f530f2e..7df5da8d0784 100644 --- a/tests/README.md +++ b/tests/README.md @@ -1 +1 @@ -Find CI documents and instructions on running CI checks localy [here](https://clickhouse.com/docs/en/development/continuous-integration). \ No newline at end of file +Find CI documents and instructions on running CI checks locally [here](https://clickhouse.com/docs/en/development/continuous-integration). From ed942eff760002306828121e6d9990e79309acda Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 4 Apr 2024 18:11:15 +0200 Subject: [PATCH 255/470] Fix BACKUP and RESTORE of a materialized view in an Ordinary database. --- src/Backups/BackupEntriesCollector.cpp | 11 +++++------ src/Backups/BackupUtils.cpp | 11 +++++++++++ src/Backups/BackupUtils.h | 5 +++++ src/Backups/RestorerFromBackup.cpp | 18 ++++++++++-------- src/Backups/RestorerFromBackup.h | 4 ++-- 5 files changed, 33 insertions(+), 16 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index c71ce195388c..d05d22f52aa3 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -567,17 +567,16 @@ std::vector> BackupEntriesCollector::findTablesInD checkIsQueryCancelled(); - auto filter_by_table_name = [my_database_info = &database_info](const String & table_name) + auto filter_by_table_name = [&](const String & table_name) { - /// We skip inner tables of materialized views. - if (table_name.starts_with(".inner_id.")) + if (isInnerTableShouldBeSkippedForBackup(database_name, table_name)) return false; - if (my_database_info->tables.contains(table_name)) + if (database_info.tables.contains(table_name)) return true; - if (my_database_info->all_tables) - return !my_database_info->except_table_names.contains(table_name); + if (database_info.all_tables) + return !database_info.except_table_names.contains(table_name); return false; }; diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 6efca053f059..1564587dc67a 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -120,4 +120,15 @@ bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, con return compareRestoredTableDef(restored_database_create_query, create_query_from_backup, global_context); } +bool isInnerTableShouldBeSkippedForBackup(const QualifiedTableName & table_name) +{ + return isInnerTableShouldBeSkippedForBackup(table_name.database, table_name.table); +} + +bool isInnerTableShouldBeSkippedForBackup(const String & /* database_name */, const String & table_name) +{ + /// We skip inner tables of materialized views. + return table_name.starts_with(".inner.") || table_name.starts_with(".inner_id."); +} + } diff --git a/src/Backups/BackupUtils.h b/src/Backups/BackupUtils.h index 7976de818e25..48fe2dee7205 100644 --- a/src/Backups/BackupUtils.h +++ b/src/Backups/BackupUtils.h @@ -9,6 +9,7 @@ namespace DB class IBackup; class AccessRightsElements; class DDLRenamingMap; +struct QualifiedTableName; /// Initializes a DDLRenamingMap from a BACKUP or RESTORE query. DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements); @@ -20,4 +21,8 @@ AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); +/// Returns true if this table should be skipped while making a backup because it's an inner table. +bool isInnerTableShouldBeSkippedForBackup(const QualifiedTableName & table_name); +bool isInnerTableShouldBeSkippedForBackup(const String & database_name, const String & table_name); + } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index ed1d5b8a103d..fcbe26caa534 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -343,12 +343,12 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup() { case ASTBackupQuery::ElementType::TABLE: { - findTableInBackup({element.database_name, element.table_name}, element.partitions); + findTableInBackup({element.database_name, element.table_name}, /* skip_if_inner_table= */ false, element.partitions); break; } case ASTBackupQuery::ElementType::TEMPORARY_TABLE: { - findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, element.partitions); + findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, /* skip_if_inner_table= */ false, element.partitions); break; } case ASTBackupQuery::ElementType::DATABASE: @@ -367,14 +367,14 @@ void RestorerFromBackup::findDatabasesAndTablesInBackup() LOG_INFO(log, "Will restore {} databases and {} tables", getNumDatabases(), getNumTables()); } -void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) +void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions) { schedule( - [this, table_name_in_backup, partitions]() { findTableInBackupImpl(table_name_in_backup, partitions); }, + [this, table_name_in_backup, skip_if_inner_table, partitions]() { findTableInBackupImpl(table_name_in_backup, skip_if_inner_table, partitions); }, "Restore_FindTbl"); } -void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) +void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions) { bool is_temporary_table = (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE); @@ -419,6 +419,10 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ = *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); } + QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); + if (skip_if_inner_table && isInnerTableShouldBeSkippedForBackup(table_name)) + return; + auto read_buffer = backup->readFile(*metadata_path); String create_query_str; readStringUntilEOF(create_query_str, *read_buffer); @@ -429,8 +433,6 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); String create_table_query_str = serializeAST(*create_table_query); - QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); - bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); auto table_dependencies = getDependenciesFromCreateQuery(context, table_name, create_table_query); bool table_has_data = backup->hasFiles(data_path_in_backup); @@ -565,7 +567,7 @@ void RestorerFromBackup::findDatabaseInBackupImpl(const String & database_name_i if (except_table_names.contains({database_name_in_backup, table_name_in_backup})) continue; - findTableInBackup({database_name_in_backup, table_name_in_backup}, /* partitions= */ {}); + findTableInBackup({database_name_in_backup, table_name_in_backup}, /* skip_if_inner_table= */ true, /* partitions= */ {}); } } diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 238569ac7554..7b36eea0ba07 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -92,8 +92,8 @@ class RestorerFromBackup : private boost::noncopyable void findRootPathsInBackup(); void findDatabasesAndTablesInBackup(); - void findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); - void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); + void findTableInBackup(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions); + void findTableInBackupImpl(const QualifiedTableName & table_name_in_backup, bool skip_if_inner_table, const std::optional & partitions); void findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names); void findDatabaseInBackupImpl(const String & database_name_in_backup, const std::set & except_table_names); void findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names); From e890c27227a1d170668365ebe2f4b90b50dd2a14 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 18:59:40 +0200 Subject: [PATCH 256/470] Fix note section of arrayDotProduct displays incorrectly --- docs/en/sql-reference/functions/array-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7d33f3d9949e..7a8be0b259aa 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -789,7 +789,7 @@ Alias: `scalarProduct` - `vector1`: First vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values. - `vector2`: Second vector. [Array](../data-types/array.md) or [Tuple](../data-types/tuple.md) of numeric values. -Note::: +:::note The sizes of the two vectors must be equal. Arrays and Tuples may also contain mixed element types. ::: From eede7c3acf5e1fcc113349b4014aaf5b981ce285 Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 4 Apr 2024 19:01:31 +0200 Subject: [PATCH 257/470] Try fix stress test --- src/Interpreters/Cache/EvictionCandidates.cpp | 2 ++ src/Interpreters/Cache/FileCache.cpp | 31 ++++++++++++++----- src/Interpreters/Cache/Metadata.cpp | 1 - .../Cache/SLRUFileCachePriority.cpp | 10 +++++- 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index 5fa2b337e648..da9e3efbffe0 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -63,6 +63,8 @@ void EvictionCandidates::add( void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & lock) { + auto log = getLogger("EvictionCandidates"); + LOG_TEST(log, "Will remove {} eviction candidates", size()); for (const auto & [key, key_candidates] : candidates) { for (const auto & candidate : key_candidates.candidates) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 90671629e647..5e97c0110f42 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1383,6 +1383,7 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, || new_settings.max_elements != actual_settings.max_elements) { std::optional eviction_candidates; + bool modified_size_limits = false; { cache_is_being_resized.store(true, std::memory_order_relaxed); SCOPE_EXIT({ @@ -1397,8 +1398,21 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, eviction_candidates->removeQueueEntries(cache_lock); - main_priority->modifySizeLimits( - new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); + modified_size_limits = main_priority->getSize(cache_lock) <= new_settings.max_size + && main_priority->getElementsCount(cache_lock) <= new_settings.max_elements; + + if (modified_size_limits) + { + main_priority->modifySizeLimits( + new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); + } + else + { + LOG_WARNING(log, "Unable to modify size limit from {} to {}, " + "elements limit from {} to {}", + actual_settings.max_size, new_settings.max_size, + actual_settings.max_elements, new_settings.max_elements); + } } try @@ -1412,12 +1426,15 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, throw; } - LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", - actual_settings.max_size, new_settings.max_size, - actual_settings.max_elements, new_settings.max_elements); + if (modified_size_limits) + { + LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", + actual_settings.max_size, new_settings.max_size, + actual_settings.max_elements, new_settings.max_elements); - actual_settings.max_size = new_settings.max_size; - actual_settings.max_elements = new_settings.max_elements; + actual_settings.max_size = new_settings.max_size; + actual_settings.max_elements = new_settings.max_elements; + } } if (new_settings.max_file_segment_size != actual_settings.max_file_segment_size) diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 26611f023797..631c1aa2ae67 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -941,7 +941,6 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( file_segment->detach(segment_lock, *this); - // if (!remove_only_metadata) { try { diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 79ca489cea27..ff583c440c8c 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -262,6 +262,9 @@ EvictionCandidates SLRUFileCachePriority::collectCandidatesForEviction( auto res = probationary_queue.collectCandidatesForEviction( desired_probationary_size, desired_probationary_elements_num, max_candidates_to_evict, stat, lock); + LOG_TEST(log, "Collected {} eviction candidates from probationary queue (size: {})", + res.size(), stat.total_stat.releasable_size); + chassert(!max_candidates_to_evict || res.size() <= max_candidates_to_evict); chassert(res.size() == stat.total_stat.releasable_count); @@ -271,10 +274,15 @@ EvictionCandidates SLRUFileCachePriority::collectCandidatesForEviction( const auto desired_protected_size = getRatio(max_size, size_ratio); const auto desired_protected_elements_num = getRatio(max_elements, size_ratio); + FileCacheReserveStat protected_stat; auto res_add = protected_queue.collectCandidatesForEviction( desired_protected_size, desired_protected_elements_num, - max_candidates_to_evict ? max_candidates_to_evict - res.size() : 0, stat, lock); + max_candidates_to_evict ? max_candidates_to_evict - res.size() : 0, protected_stat, lock); + + LOG_TEST(log, "Collected {} eviction candidates from protected queue (size: {})", + res_add.size(), protected_stat.total_stat.releasable_size); + stat += protected_stat; res.insert(std::move(res_add), lock); return res; } From bd82a2dd9722b810e1c997bfdf17223313d684a5 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 4 Apr 2024 18:11:56 +0200 Subject: [PATCH 258/470] Add test. --- .../02864_restore_table_with_broken_part.sh | 35 ++++++++------- ...03001_backup_matview_after_modify_query.sh | 3 +- ...ith_matview_inner_table_metadata.reference | 4 ++ ...ackup_with_matview_inner_table_metadata.sh | 40 ++++++++++++++++++ ...ckup_with_matview_inner_table_metadata.zip | Bin 0 -> 8272 bytes 5 files changed, 65 insertions(+), 17 deletions(-) create mode 100644 tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.reference create mode 100755 tests/queries/0_stateless/03001_restore_from_old_backup_with_matview_inner_table_metadata.sh create mode 100644 tests/queries/0_stateless/backups/old_backup_with_matview_inner_table_metadata.zip diff --git a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh index d3252b29eb79..fe26784dab4d 100755 --- a/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh +++ b/tests/queries/0_stateless/02864_restore_table_with_broken_part.sh @@ -5,33 +5,38 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -# Copy backups/with_broken_part.zip into the disk named "backups". -SRC_BACKUP_DIR=$CURDIR/backups -SRC_BACKUP_FILENAME=with_broken_part.zip +# Copies a test predefined backup from "/tests/queries/0_stateless/backups/" folder to the "backups" disk, +# returns the path to the backup relative to that disk. +function install_test_backup() +{ + local test_backup_filename="$1" + local test_backup_path="$CURDIR/backups/${test_backup_filename}" -BACKUPS_DISK=backups -BACKUPS_DIR=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='$BACKUPS_DISK'") + local backups_disk_root=$($CLICKHOUSE_CLIENT --query "SELECT path FROM system.disks WHERE name='backups'") -if [ -z "$BACKUPS_DIR" ]; then - echo Disk \'$BACKUPS_DISK\' not found - exit 1 -fi + if [ -z "${backups_disk_root}" ]; then + echo Disk \'${backups_disk_root}\' not found + exit 1 + fi -BACKUP_FILENAME=$CLICKHOUSE_DATABASE/${SRC_BACKUP_FILENAME} -BACKUP_NAME="Disk('$BACKUPS_DISK', '$BACKUP_FILENAME')" + local install_path=${backups_disk_root}/${CLICKHOUSE_DATABASE}/${test_backup_filename} + mkdir -p "$(dirname "${install_path}")" + ln -s "${test_backup_path}" "${install_path}" -mkdir -p "$(dirname "$BACKUPS_DIR/$BACKUP_FILENAME")" -ln -s "$SRC_BACKUP_DIR/$SRC_BACKUP_FILENAME" "$BACKUPS_DIR/$BACKUP_FILENAME" + echo "${CLICKHOUSE_DATABASE}/${test_backup_filename}" +} + +backup_name="$(install_test_backup with_broken_part.zip)" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" # First try to restore with the setting `restore_broken_parts_as_detached` set to false. -$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED" +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}')" 2>&1 | tr -d \\n | grep "data.bin doesn't exist" | grep "while restoring part all_2_2_0" > /dev/null && echo "OK" || echo "FAILED" $CLICKHOUSE_CLIENT --query "DROP TABLE IF EXISTS tbl" # Then try to restore with the setting `restore_broken_parts_as_detached` set to true. -$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM $BACKUP_NAME SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' +$CLICKHOUSE_CLIENT --query "RESTORE TABLE default.tbl AS tbl FROM Disk('backups', '${backup_name}') SETTINGS restore_broken_parts_as_detached = true" 2>/dev/null | awk -F '\t' '{print $2}' $CLICKHOUSE_CLIENT --multiquery <>UBH5!~R5r<$ zM7B!*_a)xTPyPIFpA+()?|Gi{eBbYL&Pi7bpMVyIM?wN43k^MOE9#2oCV;^R8DTJD z7z`$8Wohl?<<^_r?ZPBQ7H*DqT@Yc2N>IyDf7{ zGRMw&e+L^}B&v2`QUAyf{KaAS_ai~gE5{4^npg})yM6ivzw>%6KE0k)niwHrUT6Dj zV*2t=@A9k75<+RV78U);KNneQ%0-t1b^FW8d%k+lO5Y(;jvZ^k3zbPL>K$er5X8nM z;Q6YKl~m=IeZrs%-y^@DeiE3NB$DTQqhvU#p*L+hu-v%bI7m?5%lGlwM=R)JtASgi zdF3)a{jMXw8k)k&WlZOWU3pjzmkg6Jsxg^x3!2n?)K?&_uNl(fvOCjX*3;!<4p+72 zZ)X{Ztg3}aIMz_>&xLVN8ee)=9V$+xn#5{$!Mf0dpGYcL%z~%hKgv`SrLN2aTd6aW zRPn}h6TgO~X1A&9GJSO0O_?k#r}G{nBd-2qweXv-eLb%7u0c6;^697(sjoGg#qiie zo1^ok_H{doW}9ix7Ig?1DqXXWNNkh~-d~%LXLI4LiH;f{vktbWb(p*4y*Er`C`a4* zj8mAbEzw99wHnK%1!|{d(+0h);a;ony39$(ojNB+6U+>Glq5u*^Au`szjJ21abM|> zetkod=Kb;y(Rp?}-K8%-5BOT$iWf4u;C)pYc1Vi#o}8P3o_dAVlO#6T{L#kJRS)x# z`B`^t<@98M@*x2u^TH|*&WD6kj~PbkBZY1S3SomJFGk#)DvUpH;#o26`6mlS?Q@wH zc~it+3O-k+iu!13T9tXz5+M7HHRDaI9W##S&U7kGf6yA&Jr_~O?)~ZWjhe^i{#Rnc zelZ5|{uEf~KTY_MY=3Ui!bmE|U`Sh0XX`=Eq3c!&aov-j^?1wGu4fOdLHS z4oa4%zcYSFSKqU*!|{@y4K4!}H zK-x(urABx15l6&L70T1ph2djmsALA3Y)yOZN=jdkNo?qoc}~?E@zs{X4OSs<9DVQ1 zveL_-Bu!10a;fERPHGUEe4OZ%j$R63_V2D%%w+X-GL!X;;wC>xGsYewbnJz|*by?C zH_HO}s)rVcKQOAt*t8~B$6VRFqL9U9eu9AMF5Zv+o;K7kSLd|YuvNj;@zd{b5b=^7 zZ|vk?&Tsd?#?`4FqrQS4k32tpW#26Mih}#gWjpsH_riZ>zo#@&9ilIsH6=wbYN*#F z$5mga8i*B@B6%R{Pg(Y*+gO;&sm}2oiuV|aAfp7lGd3!aEmQnZfbu^KhG)=Z=R4+n zPm=42@fL|OdDdNu%B>AJIvRENNBTk%27_YbBwLZGb`Gx0z2bhi#&K~aUUTusz?&f5 zH8Fvaz+i7Kgq{ZUWAj4@d}m|nX(+?aGvYk_8k3|=Qu@F99&$` z9>70W}zmiXW$NjuUu{ z3os$T*3!0?ldXG&R_*D4L1R5FF>ZoZ4lbgr!cMC)C$Ax{xyA}S%rDpLrmR`=D=GLwlHxD!x#09>$L0g09O2?H|l(@Bx#dx4FAAz+y^pM8ln9uoK`u4S_kYI9z58Ce707i20a&`v8nYd>c`6$hFO?nAEUpjJZ_?^Qiw1tHQkrh6YXze)GFJW*? zpX>`$_W(hJf7|~<0E~x|h_!_RVUv9TvzQ#tIfK9yv(W@rz#)USJ7!tiqph8=UKlKR z1e-q~ZCyPumY&GphcAs3BOMuIs-G9i4Zo9rdOB=k=6;9-5JMcWX=W1Q-x+78!8Tl_ zH?m7Kcy_yLp1w#3j#wJHEH{Lp-;3}LHI_U&jw0cMO_P9o5ayP)Pr^j8*jpL~YG1EY zHH48d!4$w`YQg6B9b}V>e(@mM=7g^gQB&!day=aXy`}+eIuqT2)>zhth!~O6(r(Oob5w=+JbwnZ+k_o;|SVF+tiFBea6KX9Mti3BP|%Vm6ds*S&B{JUXfxbH9T zlS4B2uCFOP^J}_jm*vK~;3Qvp&${$W={&ReA+dl^)8-`K^5$_JT)lBH(N^Q_Z9X;Os>k60NVvJe z$&n2nfD-iIJOC4o!+>MI8L&<*5VB*hIW;IjAhmYMfDI0B;u0+3&jUy0+UNMlM#6r=EIsk$mU;?{Da`wWwtkF}e5VU2U?36MFNc%3A_XA|fI<@2lzg*n^^VtDfjKgsIMdxW@sb_2P@)5%Dahh zyz6UC>g8%l=VY~oWxRAIC3uSyatH|7(n@5%!DyDhgtSKC(pX}Pkbz4t#!?rs0F(pm zZ%f})tpAcP+GirQ^ooOiKBC_7gXdmogvLH0hd%8S!=drwepEw>oyX#VG$=9E`{sr3L%8 z9~q4%vuJm4C?Qp_SqK_8@qiQg1$@17lwTHAth>76Vf-)_5KU#+FGBc9G5bVKLQZ1z zpc7-H_G17+L>WfM0{lfVExT(i6XD@9n-Yn-746Q0eNL--3k9bvHx|#F{UMk;*+l*{ z6|O}MU8`V*OEqLqAb}i!#thh>$N`)baJij&#Wis+or!UAdgA8d#QfI~O2CB(FblXa z90kX@NI@2SXT0-Zg9Tub^CxU5@0;3VAAs1RY2eG58!)?16M!)U46FkF1ZVBK8>u+8 zE)}6$r6Q!rmW%OP3Mdx{(shwgct(}F@k)U@(X*}M1xtyIG6|=y{0uyH4{g>0ZX7f! z+{eHwPY~MtZ1NkR8e0A3I01$<(QhrF%_gN^0qh1~_ zo<%rSE{aQR{@?RB(wi>j7fqF2r|ovB*2QF6F(4hMzR%`+$f z-PN_KvW*?1wFR45FQ%9Vw`8f`kJS4}cx9&PrUk(v-SAp%)2qVq8f3ykIrEzJSU$x3 zHyR3^_R1ih$FyF0ft3Z-vH$oOTQ?vK&%3z!3Qe|YdzDIP1Wm0>HL+@w_+~%%6E@-8 z<5$R#HXd=zoZ7-Ymblz?WH^qm}krO#3QeWQiR?igORykFzmqHH-Vl&{M z&$ihPMaihoFvRhj|8vP%hG$vvRc{}|?R($-6P(k`pCOi214D9=;;^CiDW1;w+I|~G z)!jA$#2T@K)lz)?MjX3u(3 zZr?+_Bn2doD){Luv(C=a4->bYHL&qh#ax6mAUDdPjz~#lWL{$F>9pG&pgvtl4h!M*P~|lT8Ql(oM(Y5n-zd0 z6O%`!2LO`11W2;)UnBvlo9)y%iJ&BjkbP=maI&je6OkmVbAZ0$Vr89#&UpJ*dd6an z692g-|G~pwn-?*DX<7UwK}+!kw%imxs-u&(X;DNm!#dCHF85^ks}AURe13ecyywZy z_Zj&;Rr2yKM#a8zC_Sg?s4Sv5bRHM zGvXnA1+n~qK?s-Am> zJ?INcFiW~Ji@D)c9#M-#+u_-_$%_PM&GZ$$#5xkRA}l?=u(Nz?2+O%_Tio{IgM;EQ zJXSLGa*TrTqL2sTljJ2+VbM{R3VW*8pXGJ6;CQt7e;zag#8~^J2ZLXJkDWK1z$x_44A`K;XX~mUJlyVKW37K0J3-i?yTab=Y{7zX9sJJ| zCJ20CSKu4P4DP&XdyMS^Cn$y%P>TL{a{fAg!VR|5*a-?&u&ZFdM>(gH2| z&*KmX_v5Z{*9LqL_&Voz8jC>S;(!wEV$mBz65MR;G$nz6gLVnL*$8bwgR-|>u)ro_ z{R1lOexPH6*1SIUfW&U6!3X54^eEr**b4_nqxqa=49K`1a0;ARAYU|>pa2Z@75tK^k1-`^g*x2dj585*&)oQf5F6Q;5S$U@^+`jdmRwut^8h`Rv0%=J2m3a zI&5*>HrjIBs5`aXP}Gv`?wh5-pIUGz^yt5Q*BkPU=pe#Q-4}!)NR6}Swen_5K>!xq pxL`X~=@6JcFd*2teudV+e%I`}T7 Date: Thu, 4 Apr 2024 19:16:38 +0200 Subject: [PATCH 259/470] empty commit From 1fe2f35d8a9bc781ebdfad2501a59ca9b43d69bc Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 19:16:53 +0200 Subject: [PATCH 260/470] Minor changes --- .../functions/array-functions.md | 32 +++++++++++-------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 7a8be0b259aa..f45b3e3386ab 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -952,7 +952,7 @@ arrayEnumerateUniqRanked(clear_depth, arr, max_array_depth) **Parameters** - `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. -- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md) +- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md). - `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`. **Example** @@ -971,7 +971,7 @@ Result: [1,1,2] ``` -In this example, `arrayEnumerateUniqRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[1,2,3]`, the corresponding result is `[1,1,1]`, indicating that this is the first time `1`,`2` and `3` are encountered. For the second row of the provided array,`[2,2,1]`, the corresponding result is `[2,3,3]`, indicating that the number `2` is encountered for a second and third time, and `1` is encountered for the second time. Likewise, for the third row of the provided array `[3]` the corresponding result is `[2]` indicating that the number `3` is encountered for the second time. +In this example, `arrayEnumerateUniqRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[1,2,3]`, the corresponding result is `[1,1,1]`, indicating that this is the first time `1`,`2` and `3` are encountered. For the second row of the provided array,`[2,2,1]`, the corresponding result is `[2,3,3]`, indicating that `2` is encountered for a second and third time, and `1` is encountered for the second time. Likewise, for the third row of the provided array `[3]` the corresponding result is `[2]` indicating that `3` is encountered for the second time. Query: @@ -1417,7 +1417,7 @@ Same as `arrayReverseSort` with additional `limit` argument allowing partial sor ## arrayShuffle Returns an array of the same size as the original array containing the elements in shuffled order. -Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. +Elements are reordered in such a way that each possible permutation of those elements has equal probability of appearance. **Syntax** @@ -1427,8 +1427,10 @@ arrayShuffle(arr, seed) **Parameters** -- `arr`: The array to partially shuffle. [Array](../data-types/array.md) -- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md) +- `arr`: The array to partially shuffle. [Array](../data-types/array.md). +- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md). + +**Returned value** - Array with elements shuffled. @@ -1440,7 +1442,7 @@ This function will not materialize constants. **Examples** -In this example, `arrayShuffle` is used with providing a `seed` and will use its own randomly generated one. +In this example, `arrayShuffle` is used without providing a `seed` and will therefore generate one randomly itself. Query: @@ -1472,7 +1474,7 @@ Result: ## arrayPartialShuffle -Returns an array of the same size as the original array where elements in range [1..limit] are a random subset of the original array. Remaining (limit..n] shall contain the elements not in [1..limit] range in undefined order. Value of limit shall be in range [1..n]. Values outside of that range are equivalent to performing full [arrayShuffle](#arrayShuffle). +Returns an array of the same size as the original array where elements in range `[1..limit]` are a random subset of the original array. Remaining `(limit..N]` shall contain the elements not in `[1..limit]` range in an undefined order. **Syntax** @@ -1482,8 +1484,8 @@ arrayPartialShuffle(arr, limit, seed) **Parameters** -- `arr`: The array to partially shuffle. [Array](../data-types/array.md) -- `limit` (optional): Specifies how many times to limit element swaps to. [UInt or Int](../data-types/int-uint.md) +- `arr`: The array size `N` to partially shuffle. [Array](../data-types/array.md) +- `limit` (optional): Number to limit element swaps to, in the range `[1..N]`. [UInt or Int](../data-types/int-uint.md). - `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md) **Returned value** @@ -1494,6 +1496,8 @@ arrayPartialShuffle(arr, limit, seed) :::note This function will not materialize constants. + +The value of `limit` shouuld be in the range `[1..N]`. Values outside of that range are equivalent to performing full [arrayShuffle](#arrayShuffle). ::: **Examples** @@ -1663,12 +1667,12 @@ arrayEnumerateDenseRanked(clear_depth, arr, max_array_depth) **Parameters** - `clear_depth`: Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`. -- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md) +- `arr`: N-dimensional array to enumerate. [Array](../data-types/array.md). - `max_array_depth`: The maximum effective depth. Positive [Integer](../data-types/int-uint.md) less than or equal to the depth of `arr`. **Example** -With `clear_depth`=1 and `max_array_depth`=1, the result is identical to what [arrayEnumerateDense](#arrayenumeratedense) would give. +With `clear_depth=1` and `max_array_depth=1`, the result is identical to what [arrayEnumerateDense](#arrayenumeratedense) would give. Query: @@ -1682,7 +1686,7 @@ Result: [1,2,1,3] ``` -In this example, `arrayEnumerateDenseRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[10,10,30,20]`, the corresponding first row of the result is `[1,1,2,3]`, indicating that `10` is the first element encountered in position 1 and 2, `30` the second element encountered in position 3 and `20` is the third element encountered in position 4. For the second row, `[40, 50, 10, 30]`, the corresponding second row of the result is `[4,5,1,2]`, indicating that `40` and `50` are the fourth and fifth numbers encountered in position 1 and 2 of that row, that another `10` (the first encountered number) is in position 3 and `30` (the second number encountered) is in the last position. +In this example, `arrayEnumerateDenseRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value. For the first row of the passed array,`[10,10,30,20]`, the corresponding first row of the result is `[1,1,2,3]`, indicating that `10` is the first number encountered in position 1 and 2, `30` the second number encountered in position 3 and `20` is the third number encountered in position 4. For the second row, `[40, 50, 10, 30]`, the corresponding second row of the result is `[4,5,1,2]`, indicating that `40` and `50` are the fourth and fifth numbers encountered in position 1 and 2 of that row, that another `10` (the first encountered number) is in position 3 and `30` (the second number encountered) is in the last position. Query: @@ -2236,7 +2240,7 @@ arrayFirstOrNull(func, arr1, …) **Parameters** - `func`: lambda function. -- `arr1`: array to operate on. [Array](../) +- `arr1`: array to operate on. [Array](../data-types/array.md). **Returned value** @@ -2292,7 +2296,7 @@ arrayLastOrNull(func, arr1, …) **Parameters** - `func`: lambda function. -- `arr1`: array to operate on. [Array](../) +- `arr1`: array to operate on. [Array](../data-types/array.md). **Returned value** From 95b5616d1a857f6c7fc4f5ea8f24edbed99b85b5 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 19:51:04 +0200 Subject: [PATCH 261/470] A few more small fixes --- docs/en/sql-reference/functions/array-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index f45b3e3386ab..f91a96fec21b 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1484,9 +1484,9 @@ arrayPartialShuffle(arr, limit, seed) **Parameters** -- `arr`: The array size `N` to partially shuffle. [Array](../data-types/array.md) -- `limit` (optional): Number to limit element swaps to, in the range `[1..N]`. [UInt or Int](../data-types/int-uint.md). -- `seed` (optional): seed to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md) +- `arr`: The array size `N` to partially shuffle. [Array](../data-types/array.md). +- `limit` (optional): The number to limit element swaps to, in the range `[1..N]`. [UInt or Int](../data-types/int-uint.md). +- `seed` (optional): The seed value to be used with random number generation. If not provided a random one is used. [UInt or Int](../data-types/int-uint.md) **Returned value** @@ -1497,7 +1497,7 @@ arrayPartialShuffle(arr, limit, seed) :::note This function will not materialize constants. -The value of `limit` shouuld be in the range `[1..N]`. Values outside of that range are equivalent to performing full [arrayShuffle](#arrayShuffle). +The value of `limit` should be in the range `[1..N]`. Values outside of that range are equivalent to performing full [arrayShuffle](#arrayshuffle). ::: **Examples** From d5493bccf59724c411cdd31341cc23ff5acc9a32 Mon Sep 17 00:00:00 2001 From: Blargian Date: Thu, 4 Apr 2024 20:00:28 +0200 Subject: [PATCH 262/470] Add a link to lambda functions for arrayFirstOrNull and arrayLastOrNull --- docs/en/sql-reference/functions/array-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index f91a96fec21b..d481996e854b 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2239,8 +2239,8 @@ arrayFirstOrNull(func, arr1, …) **Parameters** -- `func`: lambda function. -- `arr1`: array to operate on. [Array](../data-types/array.md). +- `func`: Lambda function. [Lambda function](../functions/#higher-order-functions---operator-and-lambdaparams-expr-function). +- `arr1`: Array to operate on. [Array](../data-types/array.md). **Returned value** @@ -2295,8 +2295,8 @@ arrayLastOrNull(func, arr1, …) **Parameters** -- `func`: lambda function. -- `arr1`: array to operate on. [Array](../data-types/array.md). +- `func`: Lambda function. [Lambda function](../functions/#higher-order-functions---operator-and-lambdaparams-expr-function). +- `arr1`: Array to operate on. [Array](../data-types/array.md). **Returned value** From 0fccaafda45ccf85eeeacc6c8cd6f417fed91e73 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 4 Apr 2024 20:06:40 +0200 Subject: [PATCH 263/470] Fix tests --- docker/test/stateless/run.sh | 2 ++ tests/ci/integration_tests_runner.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index b9ed0561a48e..d7088aa05fbd 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -16,6 +16,8 @@ ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone dpkg -i package_folder/clickhouse-common-static_*.deb dpkg -i package_folder/clickhouse-common-static-dbg_*.deb +dpkg -i package_folder/clickhouse-odbc-bridge_*.deb +dpkg -i package_folder/clickhouse-library-bridge_*.deb dpkg -i package_folder/clickhouse-server_*.deb dpkg -i package_folder/clickhouse-client_*.deb diff --git a/tests/ci/integration_tests_runner.py b/tests/ci/integration_tests_runner.py index f50124500cc5..90e2b08386fc 100755 --- a/tests/ci/integration_tests_runner.py +++ b/tests/ci/integration_tests_runner.py @@ -342,6 +342,8 @@ def _install_clickhouse(self, debs_path): "clickhouse-common-static_", "clickhouse-server_", "clickhouse-client", + "clickhouse-odbc-bridge_", + "clickhouse-library-bridge_", "clickhouse-common-static-dbg_", ): # order matters logging.info("Installing package %s", package) From 3e1e4348333fb026c17420be34871f1638bcf2e0 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 4 Apr 2024 19:52:40 +0000 Subject: [PATCH 264/470] Fix data race on scalars in Context --- src/Interpreters/Context.cpp | 11 +- .../03033_scalars_context_data_race.reference | 1 + .../03033_scalars_context_data_race.sql | 104 ++++++++++++++++++ 3 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03033_scalars_context_data_race.reference create mode 100644 tests/queries/0_stateless/03033_scalars_context_data_race.sql diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 65fcd51529bd..0f28e5d87718 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1552,12 +1552,15 @@ ClassifierPtr Context::getWorkloadClassifier() const const Scalars & Context::getScalars() const { + std::lock_guard lock(mutex); return scalars; } const Block & Context::getScalar(const String & name) const { + std::lock_guard lock(mutex); + auto it = scalars.find(name); if (scalars.end() == it) { @@ -1570,6 +1573,7 @@ const Block & Context::getScalar(const String & name) const const Block * Context::tryGetSpecialScalar(const String & name) const { + std::lock_guard lock(mutex); auto it = special_scalars.find(name); if (special_scalars.end() == it) return nullptr; @@ -1653,7 +1657,8 @@ void Context::addScalar(const String & name, const Block & block) if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars"); - scalars[name] = block; + std::lock_guard lock(mutex); + scalars.emplace(name, block); } @@ -1662,7 +1667,8 @@ void Context::addSpecialScalar(const String & name, const Block & block) if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have local scalars"); - special_scalars[name] = block; + std::lock_guard lock(mutex); + special_scalars.emplace(name, block); } @@ -1671,6 +1677,7 @@ bool Context::hasScalar(const String & name) const if (isGlobalContext()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars"); + std::lock_guard lock(mutex); return scalars.contains(name); } diff --git a/tests/queries/0_stateless/03033_scalars_context_data_race.reference b/tests/queries/0_stateless/03033_scalars_context_data_race.reference new file mode 100644 index 000000000000..f96ac0672183 --- /dev/null +++ b/tests/queries/0_stateless/03033_scalars_context_data_race.reference @@ -0,0 +1 @@ +105 diff --git a/tests/queries/0_stateless/03033_scalars_context_data_race.sql b/tests/queries/0_stateless/03033_scalars_context_data_race.sql new file mode 100644 index 000000000000..8c72bb53c725 --- /dev/null +++ b/tests/queries/0_stateless/03033_scalars_context_data_race.sql @@ -0,0 +1,104 @@ +DROP TABLE IF EXISTS test; +DROP TABLE IF EXISTS test_tmp; +DROP TABLE IF EXISTS dst; +DROP TABLE IF EXISTS view; + +CREATE TABLE test +( + `address` FixedString(20), + `deployer` FixedString(20), + `block_number` UInt256, + `block_hash` FixedString(32), + `block_timestamp` DateTime('UTC'), + `insertion_time` DateTime('UTC') +) +ENGINE = MergeTree +ORDER BY address +SETTINGS index_granularity = 8192; + +CREATE TABLE test_tmp as test; + +CREATE TABLE dst +( + `block_timestamp` AggregateFunction(max, Nullable(DateTime('UTC'))), + `block_hash` AggregateFunction(argMax, Nullable(FixedString(32)), DateTime('UTC')), + `block_number` AggregateFunction(argMax, Nullable(UInt256), DateTime('UTC')), + `deployer` AggregateFunction(argMax, Nullable(FixedString(20)), DateTime('UTC')), + `address` FixedString(20), + `name` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `symbol` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `decimals` AggregateFunction(argMax, Nullable(UInt8), DateTime('UTC')), + `is_proxy` AggregateFunction(argMax, Nullable(Bool), DateTime('UTC')), + `blacklist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `whitelist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `detected_standards` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `amended_type` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `comment` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `_sources` AggregateFunction(groupUniqArray, String), + `_updated_at` AggregateFunction(max, DateTime('UTC')), + `_active` AggregateFunction(argMax, Bool, DateTime('UTC')) +) +ENGINE = MergeTree +ORDER BY address +SETTINGS index_granularity = 8192; + +CREATE MATERIALIZED VIEW view TO dst +( + `block_timestamp` AggregateFunction(max, Nullable(DateTime('UTC'))), + `block_hash` AggregateFunction(argMax, Nullable(FixedString(32)), DateTime('UTC')), + `block_number` AggregateFunction(argMax, Nullable(UInt256), DateTime('UTC')), + `deployer` AggregateFunction(argMax, Nullable(FixedString(20)), DateTime('UTC')), + `address` FixedString(20), + `name` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `symbol` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `decimals` AggregateFunction(argMax, Nullable(UInt8), DateTime('UTC')), + `is_proxy` AggregateFunction(argMax, Nullable(Bool), DateTime('UTC')), + `blacklist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `whitelist_flags` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `detected_standards` AggregateFunction(argMax, Array(Nullable(String)), DateTime('UTC')), + `amended_type` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `comment` AggregateFunction(argMax, Nullable(String), DateTime('UTC')), + `_sources` AggregateFunction(groupUniqArray, String), + `_updated_at` AggregateFunction(max, DateTime('UTC')), + `_active` AggregateFunction(argMax, Bool, DateTime('UTC')) +) AS +(WITH ( + SELECT toDateTime('1970-01-01 00:00:00') + ) AS default_timestamp +SELECT + maxState(CAST(block_timestamp, 'Nullable(DateTime(\'UTC\'))')) AS block_timestamp, + argMaxState(CAST(block_hash, 'Nullable(FixedString(32))'), insertion_time) AS block_hash, + argMaxState(CAST(block_number, 'Nullable(UInt256)'), insertion_time) AS block_number, + argMaxState(CAST(deployer, 'Nullable(FixedString(20))'), insertion_time) AS deployer, + address, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS name, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS symbol, + argMaxState(CAST(NULL, 'Nullable(UInt8)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS decimals, + argMaxState(CAST(true, 'Nullable(Boolean)'), insertion_time) AS is_proxy, + argMaxState(CAST('[]', 'Array(Nullable(String))'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS blacklist_flags, + argMaxState(CAST('[]', 'Array(Nullable(String))'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS whitelist_flags, + argMaxState(CAST('[]', 'Array(Nullable(String))'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS detected_standards, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS amended_type, + argMaxState(CAST(NULL, 'Nullable(String)'), CAST(default_timestamp, 'DateTime(\'UTC\')')) AS comment, + groupUniqArrayState('tokens_proxy_deployments') AS _sources, + maxState(insertion_time) AS _updated_at, + argMaxState(true, CAST(default_timestamp, 'DateTime(\'UTC\')')) AS _active +FROM test +WHERE insertion_time > toDateTime('2024-03-14 11:38:09') +GROUP BY address); + +set max_insert_threads=4; +insert into test_tmp select * from generateRandom() limit 24; +insert into test_tmp select * from generateRandom() limit 25; +insert into test_tmp select * from generateRandom() limit 26; +insert into test_tmp select * from generateRandom() limit 30; + +INSERT INTO test(address, deployer, block_number, block_hash, block_timestamp, insertion_time) SELECT * FROM test_tmp; + +select count() from test; + +DROP TABLE test; +DROP TABLE test_tmp; +DROP TABLE dst; +DROP TABLE view; + From d7fb851d172e2955ab81ea107ed58c0867a1929f Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 4 Apr 2024 19:53:58 +0000 Subject: [PATCH 265/470] better handling of errors from azure storage --- .../IO/ReadBufferFromAzureBlobStorage.cpp | 41 ++++++++----------- .../IO/WriteBufferFromAzureBlobStorage.cpp | 20 ++++----- .../isRetryableAzureException.cpp | 19 +++++++++ .../isRetryableAzureException.h | 14 +++++++ src/Storages/MergeTree/MergeTreeData.cpp | 27 +++--------- src/Storages/MergeTree/checkDataPart.cpp | 38 ++++++++--------- src/Storages/StorageAzureBlob.cpp | 3 +- 7 files changed, 81 insertions(+), 81 deletions(-) create mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.cpp create mode 100644 src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.h diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 5947b742339e..68425c5ca18e 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -3,6 +3,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -101,18 +102,6 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() size_t sleep_time_with_backoff_milliseconds = 100; - auto handle_exception = [&, this](const auto & e, size_t i) - { - LOG_DEBUG(log, "Exception caught during Azure Read for file {} at attempt {}/{}: {}", path, i + 1, max_single_read_retries, e.Message); - if (i + 1 == max_single_read_retries) - throw; - - sleepForMilliseconds(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; - initialized = false; - initialize(); - }; - for (size_t i = 0; i < max_single_read_retries; ++i) { try @@ -124,7 +113,14 @@ bool ReadBufferFromAzureBlobStorage::nextImpl() } catch (const Azure::Core::RequestFailedException & e) { - handle_exception(e, i); + LOG_DEBUG(log, "Exception caught during Azure Read for file {} at attempt {}/{}: {}", path, i + 1, max_single_read_retries, e.Message); + if (i + 1 == max_single_read_retries || !isRetryableAzureException(e)) + throw; + + sleepForMilliseconds(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; + initialized = false; + initialize(); } } @@ -213,16 +209,6 @@ void ReadBufferFromAzureBlobStorage::initialize() size_t sleep_time_with_backoff_milliseconds = 100; - auto handle_exception = [&, this](const auto & e, size_t i) - { - LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); - if (i + 1 == max_single_download_retries) - throw; - - sleepForMilliseconds(sleep_time_with_backoff_milliseconds); - sleep_time_with_backoff_milliseconds *= 2; - }; - for (size_t i = 0; i < max_single_download_retries; ++i) { try @@ -233,7 +219,12 @@ void ReadBufferFromAzureBlobStorage::initialize() } catch (const Azure::Core::RequestFailedException & e) { - handle_exception(e,i); + LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); + if (i + 1 == max_single_download_retries || !isRetryableAzureException(e)) + throw; + + sleepForMilliseconds(sleep_time_with_backoff_milliseconds); + sleep_time_with_backoff_milliseconds *= 2; } } @@ -283,7 +274,7 @@ size_t ReadBufferFromAzureBlobStorage::readBigAt(char * to, size_t n, size_t ran catch (const Azure::Core::RequestFailedException & e) { LOG_DEBUG(log, "Exception caught during Azure Download for file {} at offset {} at attempt {}/{}: {}", path, offset, i + 1, max_single_download_retries, e.Message); - if (i + 1 == max_single_download_retries) + if (i + 1 == max_single_download_retries || !isRetryableAzureException(e)) throw; sleepForMilliseconds(sleep_time_with_backoff_milliseconds); diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index fe64415191c5..921f99ffef33 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -3,6 +3,7 @@ #if USE_AZURE_BLOB_STORAGE #include +#include #include #include #include @@ -83,17 +84,6 @@ WriteBufferFromAzureBlobStorage::~WriteBufferFromAzureBlobStorage() void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, size_t num_tries, size_t cost) { - auto handle_exception = [&, this](const auto & e, size_t i) - { - if (cost) - write_settings.resource_link.accumulate(cost); // Accumulate resource for later use, because we have failed to consume it - - if (i == num_tries - 1) - throw; - - LOG_DEBUG(log, "Write at attempt {} for blob `{}` failed: {} {}", i + 1, blob_path, e.what(), e.Message); - }; - for (size_t i = 0; i < num_tries; ++i) { try @@ -104,7 +94,13 @@ void WriteBufferFromAzureBlobStorage::execWithRetry(std::function func, } catch (const Azure::Core::RequestFailedException & e) { - handle_exception(e, i); + if (cost) + write_settings.resource_link.accumulate(cost); // Accumulate resource for later use, because we have failed to consume it + + if (i == num_tries - 1 || !isRetryableAzureException(e)) + throw; + + LOG_DEBUG(log, "Write at attempt {} for blob `{}` failed: {} {}", i + 1, blob_path, e.what(), e.Message); } catch (...) { diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.cpp b/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.cpp new file mode 100644 index 000000000000..e32815e96132 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.cpp @@ -0,0 +1,19 @@ +#include +#if USE_AZURE_BLOB_STORAGE + +namespace DB +{ + +bool isRetryableAzureRequestException(const Azure::Core::RequestFailedException & e) +{ + /// Always retry transport errors. + if (dynamic_cast(&e)) + return true; + + /// Retry other 5xx errors just in case. + return e.StatusCode >= Azure::Core::Http::HttpStatusCode::InternalServerError; +} + +#endif + +} diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.h b/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.h new file mode 100644 index 000000000000..dfd13e4c98a0 --- /dev/null +++ b/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.h @@ -0,0 +1,14 @@ +#pragma once +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE +#include + +namespace DB +{ + +bool isRetryableAzureException(const Azure::Core::RequestFailedException & e); + +} + +#endif diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index e5ace0e5969d..1f2ed96f11b2 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -8,7 +8,6 @@ #include #include #include -#include "Common/logger_useful.h" #include #include #include @@ -1312,7 +1311,8 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( /// during loading, such as "not enough memory" or network error. if (isRetryableException(std::current_exception())) throw; - LOG_DEBUG(log, "Failed to load data part {}, unknown exception", part_name); + + LOG_DEBUG(log, "Failed to load data part {} with exception: {}", part_name, getExceptionMessage(std::current_exception(), false)); mark_broken(); return res; } @@ -1343,6 +1343,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( /// during loading, such as "not enough memory" or network error. if (isRetryableException(std::current_exception())) throw; + mark_broken(); return res; } @@ -1461,25 +1462,9 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPartWithRetries( if (try_no + 1 == max_tries) throw; - String exception_message; - try - { - rethrow_exception(exception_ptr); - } - catch (const Exception & e) - { - exception_message = e.message(); - } - #if USE_AZURE_BLOB_STORAGE - catch (const Azure::Core::RequestFailedException & e) - { - exception_message = e.Message; - } - #endif - - - LOG_DEBUG(log, "Failed to load data part {} at try {} with retryable error: {}. Will retry in {} ms", - part_name, try_no, exception_message, initial_backoff_ms); + LOG_DEBUG(log, + "Failed to load data part {} at try {} with retryable error: {}. Will retry in {} ms", + part_name, try_no, getExceptionMessage(exception_ptr, false), initial_backoff_ms); std::this_thread::sleep_for(std::chrono::milliseconds(initial_backoff_ms)); initial_backoff_ms = std::min(initial_backoff_ms * 2, max_backoff_ms); diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index d64568e0c3e0..208da561118e 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -1,5 +1,4 @@ #include -#include #include #include @@ -16,11 +15,9 @@ #include #include #include +#include #include -#if USE_AZURE_BLOB_STORAGE -#include -#endif namespace CurrentMetrics { @@ -66,33 +63,28 @@ bool isRetryableException(std::exception_ptr exception_ptr) #if USE_AWS_S3 catch (const S3Exception & s3_exception) { - if (s3_exception.isRetryableError()) - return true; + return s3_exception.isRetryableError(); } #endif #if USE_AZURE_BLOB_STORAGE - catch (const Azure::Core::RequestFailedException &) + catch (const Azure::Core::RequestFailedException & e) { - return true; + return isRetryableAzureException(e); } #endif catch (const ErrnoException & e) { - if (e.getErrno() == EMFILE) - return true; + return e.getErrno() == EMFILE; } - catch (const Coordination::Exception & e) + catch (const Coordination::Exception & e) { - if (Coordination::isHardwareError(e.code)) - return true; + return Coordination::isHardwareError(e.code); } catch (const Exception & e) { - if (isNotEnoughMemoryErrorCode(e.code())) - return true; - - if (e.code() == ErrorCodes::NETWORK_ERROR || e.code() == ErrorCodes::SOCKET_TIMEOUT) - return true; + return isNotEnoughMemoryErrorCode(e.code()) + || e.code() == ErrorCodes::NETWORK_ERROR + || e.code() == ErrorCodes::SOCKET_TIMEOUT; } catch (const Poco::Net::NetException &) { @@ -102,10 +94,12 @@ bool isRetryableException(std::exception_ptr exception_ptr) { return true; } - - /// In fact, there can be other similar situations. - /// But it is OK, because there is a safety guard against deleting too many parts. - return false; + catch (...) + { + /// In fact, there can be other similar situations. + /// But it is OK, because there is a safety guard against deleting too many parts. + return false; + } } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 306a5eac8e59..86e96f295806 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -432,7 +432,8 @@ AzureClientPtr StorageAzureBlob::createClient(StorageAzureBlob::Configuration co try { result = std::make_unique(blob_service_client->CreateBlobContainer(configuration.container).Value); - } catch (const Azure::Storage::StorageException & e) + } + catch (const Azure::Storage::StorageException & e) { if (e.StatusCode == Azure::Core::Http::HttpStatusCode::Conflict && e.ReasonPhrase == "The specified container already exists.") From 0f5a3eae7e67c4b82d593aaf1d5f67d70a64d01d Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 4 Apr 2024 20:00:22 +0000 Subject: [PATCH 266/470] Better --- src/Functions/getScalar.cpp | 2 +- src/Interpreters/Context.cpp | 12 ++++++------ src/Interpreters/Context.h | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/Functions/getScalar.cpp b/src/Functions/getScalar.cpp index d72c84b8528e..7196cbc0a361 100644 --- a/src/Functions/getScalar.cpp +++ b/src/Functions/getScalar.cpp @@ -83,7 +83,7 @@ class FunctionGetSpecialScalar : public IFunction static ColumnWithTypeAndName createScalar(ContextPtr context_) { - if (const auto * block = context_->tryGetSpecialScalar(Scalar::scalar_name)) + if (auto block = context_->tryGetSpecialScalar(Scalar::scalar_name)) return block->getByPosition(0); else if (context_->hasQueryContext()) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0f28e5d87718..9c14a0485a33 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,7 +48,7 @@ #include #include #include -#include +//#include #include #include #include @@ -1550,14 +1550,14 @@ ClassifierPtr Context::getWorkloadClassifier() const } -const Scalars & Context::getScalars() const +Scalars Context::getScalars() const { std::lock_guard lock(mutex); return scalars; } -const Block & Context::getScalar(const String & name) const +Block Context::getScalar(const String & name) const { std::lock_guard lock(mutex); @@ -1571,13 +1571,13 @@ const Block & Context::getScalar(const String & name) const return it->second; } -const Block * Context::tryGetSpecialScalar(const String & name) const +std::optional Context::tryGetSpecialScalar(const String & name) const { std::lock_guard lock(mutex); auto it = special_scalars.find(name); if (special_scalars.end() == it) - return nullptr; - return &it->second; + return std::nullopt; + return it->second; } Tables Context::getExternalTables() const diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 8601d09621f4..a0225cb2f9a5 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -680,12 +680,12 @@ class Context: public ContextData, public std::enable_shared_from_this std::shared_ptr findExternalTable(const String & table_name) const; std::shared_ptr removeExternalTable(const String & table_name); - const Scalars & getScalars() const; - const Block & getScalar(const String & name) const; + Scalars getScalars() const; + Block getScalar(const String & name) const; void addScalar(const String & name, const Block & block); bool hasScalar(const String & name) const; - const Block * tryGetSpecialScalar(const String & name) const; + std::optional tryGetSpecialScalar(const String & name) const; void addSpecialScalar(const String & name, const Block & block); const QueryAccessInfo & getQueryAccessInfo() const { return *getQueryAccessInfoPtr(); } From 7329b9509a89ad027dd038677780ec44660a9e76 Mon Sep 17 00:00:00 2001 From: avogar Date: Thu, 4 Apr 2024 20:01:05 +0000 Subject: [PATCH 267/470] Fix header --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 9c14a0485a33..faee0602f563 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,7 +48,7 @@ #include #include #include -//#include +#include #include #include #include From d7b7e7e79031fd69db8f47d6cd51663293d12cc8 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 4 Apr 2024 22:09:21 +0200 Subject: [PATCH 268/470] Improve comment. --- src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index fd5f3853a6b6..36f927310f3c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -799,8 +799,8 @@ std::pair, bool> ReplicatedMergeTreeSinkImpl:: throw Exception( ErrorCodes::TABLE_IS_READ_ONLY, "Table is in readonly mode due to shutdown: replica_path={}", storage.replica_path); - /// When we attach existing parts it's okay to be in read-only mode - /// For example during RESTORE REPLICA. + /// Usually parts should not be attached in read-only mode. So we retry until the table is not read-only. + /// However there is one case when it's necessary to attach in read-only mode - during execution of the RESTORE REPLICA command. if (!allow_attach_while_readonly) { retries_ctl.setUserError( From 43d898874453f9a3a8cac4e264605dfed38c788f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 4 Apr 2024 22:10:06 +0200 Subject: [PATCH 269/470] Update tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py Co-authored-by: pufit --- .../integration/test_backup_restore_on_cluster/test_slow_rmt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py index 987f86694885..211b92f5ea33 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py +++ b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py @@ -82,7 +82,7 @@ def test_replicated_database_async(): node1.query("SYSTEM SYNC REPLICA ON CLUSTER 'cluster' mydb.tbl") backup_name = new_backup_name() - [id, status] = node1.query( + id, status = node1.query( f"BACKUP DATABASE mydb ON CLUSTER 'cluster' TO {backup_name} ASYNC" ).split("\t") From 43687e57329e155a603fc9c427f55e42215ad19f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 4 Apr 2024 22:10:16 +0200 Subject: [PATCH 270/470] Update tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py Co-authored-by: pufit --- .../integration/test_backup_restore_on_cluster/test_slow_rmt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py index 211b92f5ea33..15c344eadf85 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py +++ b/tests/integration/test_backup_restore_on_cluster/test_slow_rmt.py @@ -96,7 +96,7 @@ def test_replicated_database_async(): node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' SYNC") - [id, status] = node1.query( + id, status = node1.query( f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} ASYNC" ).split("\t") From 1551ab7bd6b1562483157bd2496b535b612326f7 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 4 Apr 2024 23:21:00 +0200 Subject: [PATCH 271/470] Fix error --- packages/clickhouse-library-bridge.yaml | 8 ++++---- packages/clickhouse-odbc-bridge.yaml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/packages/clickhouse-library-bridge.yaml b/packages/clickhouse-library-bridge.yaml index 95e7d4aaad00..d041e7a26dbc 100644 --- a/packages/clickhouse-library-bridge.yaml +++ b/packages/clickhouse-library-bridge.yaml @@ -26,10 +26,10 @@ contents: dst: /usr/bin/clickhouse-library-bridge # docs - src: ../AUTHORS - dst: /usr/share/doc/clickhouse-common-static/AUTHORS + dst: /usr/share/doc/clickhouse-library-bridge/AUTHORS - src: ../CHANGELOG.md - dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md + dst: /usr/share/doc/clickhouse-library-bridge/CHANGELOG.md - src: ../LICENSE - dst: /usr/share/doc/clickhouse-common-static/LICENSE + dst: /usr/share/doc/clickhouse-library-bridge/LICENSE - src: ../README.md - dst: /usr/share/doc/clickhouse-common-static/README.md + dst: /usr/share/doc/clickhouse-library-bridge/README.md diff --git a/packages/clickhouse-odbc-bridge.yaml b/packages/clickhouse-odbc-bridge.yaml index 2a7edf415499..98c459c8c26b 100644 --- a/packages/clickhouse-odbc-bridge.yaml +++ b/packages/clickhouse-odbc-bridge.yaml @@ -26,10 +26,10 @@ contents: dst: /usr/bin/clickhouse-odbc-bridge # docs - src: ../AUTHORS - dst: /usr/share/doc/clickhouse-common-static/AUTHORS + dst: /usr/share/doc/clickhouse-odbc-bridge/AUTHORS - src: ../CHANGELOG.md - dst: /usr/share/doc/clickhouse-common-static/CHANGELOG.md + dst: /usr/share/doc/clickhouse-odbc-bridge/CHANGELOG.md - src: ../LICENSE - dst: /usr/share/doc/clickhouse-common-static/LICENSE + dst: /usr/share/doc/clickhouse-odbc-bridge/LICENSE - src: ../README.md - dst: /usr/share/doc/clickhouse-common-static/README.md + dst: /usr/share/doc/clickhouse-odbc-bridge/README.md From b1bd34f66e82173bfc48c7e1a612a967562fcbc6 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 4 Apr 2024 20:25:49 +0000 Subject: [PATCH 272/470] fix --- src/Processors/QueryPlan/PartsSplitter.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index 2af1bcb02605..ec51875587e6 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -125,14 +126,18 @@ int compareValues(const Values & lhs, const Values & rhs) class IndexAccess { public: - explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { } + explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) + { + for (const auto & part : parts) + loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size()); + } Values getValue(size_t part_idx, size_t mark) const { const auto & index = parts[part_idx].data_part->getIndex(); - size_t size = index.size(); - Values values(size); - for (size_t i = 0; i < size; ++i) + chassert(index.size() >= loaded_columns); + Values values(loaded_columns); + for (size_t i = 0; i < loaded_columns; ++i) { index[i]->get(mark, values[i]); if (values[i].isNull()) @@ -199,6 +204,7 @@ class IndexAccess } private: const RangesInDataParts & parts; + size_t loaded_columns = std::numeric_limits::max(); }; class RangesInDataPartsBuilder From 6be747bf32a7f1fcd9fee8f86c72dd2b03e48c02 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Thu, 4 Apr 2024 22:28:29 +0000 Subject: [PATCH 273/470] add test --- .../__init__.py | 0 .../test.py | 47 +++++++++++++++++++ 2 files changed, 47 insertions(+) create mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/__init__.py create mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/test.py diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py b/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py new file mode 100644 index 000000000000..e710b9942dc5 --- /dev/null +++ b/tests/integration/test_final_bug_with_pk_columns_loading/test.py @@ -0,0 +1,47 @@ +import pytest +import logging + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", stay_alive=True) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + yield cluster + finally: + cluster.shutdown() + + +def test_simple_query_after_restart(start_cluster): + node.query( + """ + create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; + + insert into t select 42, number from numbers_mt(100); + insert into t select number, number from numbers_mt(100); + """ + ) + + node.restart_clickhouse() + + assert ( + int( + node.query( + "select count() from t where not ignore(*)", + settings={ + "max_threads": 4, + "merge_tree_min_bytes_for_concurrent_read": 1, + "merge_tree_min_rows_for_concurrent_read": 1, + "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": 1, + }, + ) + ) + == 200 + ) From dd852da33925af8ba52a89034da774d512add241 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Apr 2024 15:57:42 +0200 Subject: [PATCH 274/470] add more debug logs --- .../Net/include/Poco/Net/HTTPClientSession.h | 5 -- base/poco/Net/src/HTTPClientSession.cpp | 26 +++++-- base/poco/Net/src/HTTPMessage.cpp | 2 +- src/Common/HTTPConnectionPool.cpp | 68 ++++++++++++++++--- src/Disks/ObjectStorages/S3/diskSettings.cpp | 2 + src/IO/S3/Client.h | 8 +-- src/IO/S3/PocoHTTPClient.cpp | 3 +- src/IO/S3/PocoHTTPClient.h | 1 + src/IO/S3/tests/gtest_aws_s3_client.cpp | 2 +- 9 files changed, 91 insertions(+), 26 deletions(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index b418937c4d58..cbf4619834b5 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -458,11 +458,6 @@ namespace Net return _lastRequest; } - inline void HTTPClientSession::setLastRequest(Poco::Timestamp time) - { - _lastRequest = time; - } - inline double HTTPClientSession::getKeepAliveReliability() const { return _defaultKeepAliveReliabilityLevel; diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index 59800232ba95..afa1eff68a2b 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -223,12 +223,24 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) { if (connected()) { - throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection"); + throw Poco::IllegalStateException("cannot change keep alive timeout on initiated connection, " + "That value is managed privately after connection is established."); } _keepAliveTimeout = timeout; } +void HTTPClientSession::setLastRequest(Poco::Timestamp time) +{ + if (connected()) + { + throw Poco::IllegalStateException("cannot change last request on initiated connection, " + "That value is managed privately after connection is established."); + } + _lastRequest = time; +} + + std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { _pRequestStream = 0; @@ -246,8 +258,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) { if (!connected()) reconnect(); - if (!keepAlive) - request.setKeepAlive(false); + if (!request.has(HTTPMessage::CONNECTION)) + request.setKeepAlive(keepAlive); if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0) request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds()); if (!request.has(HTTPRequest::HOST) && !_host.empty()) @@ -528,14 +540,16 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) if (buffered()) throw Poco::LogicException("assign to a session with not empty buffered data"); - setLastRequest(session.getLastRequest()); + poco_assert(!connected()); + setResolvedHost(session.getResolvedHost()); setProxyConfig(session.getProxyConfig()); setTimeout(session.getConnectionTimeout(), session.getSendTimeout(), session.getReceiveTimeout()); setKeepAlive(session.getKeepAlive()); - if (!connected()) - setKeepAliveTimeout(session.getKeepAliveTimeout()); + + setLastRequest(session.getLastRequest()); + setKeepAliveTimeout(session.getKeepAliveTimeout()); attachSocket(session.detachSocket()); diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index 2f974b8bf0b8..af743dfa2eb8 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -182,7 +182,7 @@ bool HTTPMessage::getKeepAlive() const void HTTPMessage::setKeepAliveTimeout(int timeout) { - add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}", timeout)); + add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max=1000", timeout)); } diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 2d3a87dda6b4..f64d6658a55f 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -193,6 +193,18 @@ class ConnectionGroup return total_connections_in_group >= limits.store_limit; } + size_t getStored() const + { + std::lock_guard lock(mutex); + return total_connections_in_group; + } + + size_t getStoreLimit() const + { + std::lock_guard lock(mutex); + return limits.store_limit; + } + void atConnectionCreate() { std::lock_guard lock(mutex); @@ -221,12 +233,6 @@ class ConnectionGroup } } - void atPoolDestroy(size_t connections) - { - std::lock_guard lock(mutex); - total_connections_in_group -= connections; - } - HTTPConnectionGroupType getType() const { return type; } const IHTTPConnectionPoolForEndpoint::Metrics & getMetrics() const { return metrics; } @@ -345,11 +351,29 @@ class EndpointConnectionPool : public std::enable_shared_from_this ", k, v)); + } + return out; + } + std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override { std::ostream & result = Session::sendRequest(request); result.exceptions(std::ios::badbit); + // that line is for temporary debug, will be removed + LOG_INFO(log, "Send request to {} with: usage count {}, keep-alive timeout={}, headers: {}", + getTarget(), + usage_cnt, + Session::getKeepAliveTimeout().totalSeconds(), + printAllHeaders(request)); + request_stream = &result; request_stream_completed = false; @@ -368,9 +392,12 @@ class EndpointConnectionPool : public std::enable_shared_from_thisatConnectionDestroy(); if (!isExpired) + { if (auto lock = pool.lock()) lock->atConnectionDestroy(*this); + } + else + { + Poco::Timestamp now; + LOG_INFO(log, "Expired connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}", + getTarget(), + usage_cnt, + Session::getKeepAliveTimeout().totalSeconds(), + Poco::Timespan(now - Session::getLastRequest()).totalSeconds()); + } CurrentMetrics::sub(metrics.active_count); } @@ -459,6 +497,7 @@ class EndpointConnectionPool : public std::enable_shared_from_thisusage_cnt += 1; + ProfileEvents::increment(getMetrics().reused, 1); CurrentMetrics::sub(getMetrics().stored_count, 1); @@ -647,12 +688,23 @@ class EndpointConnectionPool : public std::enable_shared_from_thisisStoreLimitReached()) { + Poco::Timestamp now; + LOG_INFO(getLogger("PooledConnection"), + "Reset connection to {} with: usage count {}, keep alive timeout: {}, last usage ago: {}, is completed {}, store limit reached {} as {}/{}", + getTarget(), + connection.usage_cnt, + connection.getKeepAliveTimeout().totalSeconds(), + Poco::Timespan(now - connection.getLastRequest()).totalSeconds(), + connection.isCompleted(), + group->isStoreLimitReached(), group->getStored(), group->getStoreLimit()); + ProfileEvents::increment(getMetrics().reset, 1); return; } auto connection_to_store = allocateNewConnection(); connection_to_store->assign(connection); + connection_to_store->usage_cnt = connection.usage_cnt; { MemoryTrackerSwitcher switcher{&total_memory_tracker}; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index df1ccbb32d92..7ce94699053f 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -76,6 +76,8 @@ std::unique_ptr getClient( client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); + client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + client_configuration.endpointOverride = uri.endpoint; client_configuration.s3_use_adaptive_timeouts = config.getBool( config_prefix + ".use_adaptive_timeouts", client_configuration.s3_use_adaptive_timeouts); diff --git a/src/IO/S3/Client.h b/src/IO/S3/Client.h index c7bc727bf32e..c79ec05c8c62 100644 --- a/src/IO/S3/Client.h +++ b/src/IO/S3/Client.h @@ -96,9 +96,9 @@ bool isS3ExpressEndpoint(const std::string & endpoint); struct ClientSettings { - bool use_virtual_addressing; + bool use_virtual_addressing = false; /// Disable checksum to avoid extra read of the input stream - bool disable_checksum; + bool disable_checksum = false; /// Should client send ComposeObject request after upload to GCS. /// /// Previously ComposeObject request was required to make Copy possible, @@ -108,8 +108,8 @@ struct ClientSettings /// /// Ability to enable it preserved since likely it is required for old /// files. - bool gcs_issue_compose_request; - bool is_s3express_bucket; + bool gcs_issue_compose_request = false; + bool is_s3express_bucket = false; }; /// Client that improves the client from the AWS SDK diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index a29a4b0b8ee5..150b8146147e 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -146,7 +146,8 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio .withSendTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withTCPKeepAliveTimeout(Poco::Timespan( - client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)); + client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) + .withHTTPKeepAliveTimeout(Poco::Timespan(client_configuration.http_keep_alive_timeout, 0)); } PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index ebbddbb2c7ee..f568eb5ddb88 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -51,6 +51,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration /// See PoolBase::BehaviourOnLimit bool s3_use_adaptive_timeouts = true; + size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; std::function error_report; diff --git a/src/IO/S3/tests/gtest_aws_s3_client.cpp b/src/IO/S3/tests/gtest_aws_s3_client.cpp index 25786619241f..0a28c578f69d 100644 --- a/src/IO/S3/tests/gtest_aws_s3_client.cpp +++ b/src/IO/S3/tests/gtest_aws_s3_client.cpp @@ -159,7 +159,7 @@ void testServerSideEncryption( DB::S3::CredentialsConfiguration { .use_environment_credentials = use_environment_credentials, - .use_insecure_imds_request = use_insecure_imds_request + .use_insecure_imds_request = use_insecure_imds_request, } ); From 5cab8d185fb5ad1f8607a4ad7140a15469754e99 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Apr 2024 19:29:42 +0200 Subject: [PATCH 275/470] more details --- base/poco/Net/src/HTTPClientSession.cpp | 2 +- src/Common/HTTPConnectionPool.cpp | 42 ++++++++++++++++--------- 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index afa1eff68a2b..bc70559c5ebe 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -348,7 +348,7 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) /// when server sends its keep alive timeout, client has to follow that value auto timeout = response.getKeepAliveTimeout(); if (timeout > 0) - _keepAliveTimeout = Poco::Timespan(timeout, 0); + _keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0)); } if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index f64d6658a55f..eb6ce00e611f 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -322,6 +322,11 @@ class EndpointConnectionPool : public std::enable_shared_from_thisusage_cnt += 1; - ProfileEvents::increment(getMetrics().reused, 1); CurrentMetrics::sub(getMetrics().stored_count, 1); @@ -690,13 +701,16 @@ class EndpointConnectionPool : public std::enable_shared_from_thisisStoreLimitReached(), group->getStored(), group->getStoreLimit()); + group->isStoreLimitReached(), group->getStored(), group->getStoreLimit(), + connection.exception_level - std::uncaught_exceptions()); ProfileEvents::increment(getMetrics().reset, 1); return; From ae3a1999398b4f16880e2d892cb11bb414944b81 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Thu, 4 Apr 2024 22:49:52 +0200 Subject: [PATCH 276/470] support max requests for keep alive --- .../Net/include/Poco/Net/HTTPClientSession.h | 20 ++++ base/poco/Net/include/Poco/Net/HTTPMessage.h | 3 +- base/poco/Net/src/HTTPClientSession.cpp | 33 +++++- base/poco/Net/src/HTTPMessage.cpp | 37 ++++--- src/Common/HTTPConnectionPool.cpp | 69 +++++++----- src/Common/tests/gtest_connection_pool.cpp | 103 ++++++++++++++++-- src/Core/Defines.h | 1 + src/Disks/ObjectStorages/S3/diskSettings.cpp | 3 +- src/IO/ConnectionTimeouts.cpp | 1 + src/IO/ConnectionTimeouts.h | 2 + src/IO/S3/Credentials.h | 2 + src/IO/S3/PocoHTTPClient.h | 1 + 12 files changed, 219 insertions(+), 56 deletions(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index cbf4619834b5..edbb135d8c67 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -213,6 +213,12 @@ namespace Net Poco::Timespan getKeepAliveTimeout() const; /// Returns the connection timeout for HTTP connections. + void setKeepAliveMaxRequests(int max_requests); + + int getKeepAliveMaxRequests() const; + + int getKeepAliveRequest() const; + bool isKeepAliveExpired(double reliability = 1.0) const; /// Returns if the connection is expired with some margin as fraction of timeout as reliability @@ -352,6 +358,8 @@ namespace Net void assign(HTTPClientSession & session); + void setKeepAliveRequest(int request); + HTTPSessionFactory _proxySessionFactory; /// Factory to create HTTPClientSession to proxy. private: @@ -360,6 +368,8 @@ namespace Net Poco::UInt16 _port; ProxyConfig _proxyConfig; Poco::Timespan _keepAliveTimeout; + int _keepAliveCurrentRequest = 0; + int _keepAliveMaxRequests = 1000; Poco::Timestamp _lastRequest; bool _reconnect; bool _mustReconnect; @@ -463,6 +473,16 @@ namespace Net return _defaultKeepAliveReliabilityLevel; } + inline int HTTPClientSession::getKeepAliveMaxRequests() const + { + return _keepAliveMaxRequests; + } + + inline int HTTPClientSession::getKeepAliveRequest() const + { + return _keepAliveCurrentRequest; + } + } } // namespace Poco::Net diff --git a/base/poco/Net/include/Poco/Net/HTTPMessage.h b/base/poco/Net/include/Poco/Net/HTTPMessage.h index 994807ffbff5..8bc95ccc1af5 100644 --- a/base/poco/Net/include/Poco/Net/HTTPMessage.h +++ b/base/poco/Net/include/Poco/Net/HTTPMessage.h @@ -120,8 +120,9 @@ namespace Net /// The value is set to "Keep-Alive" if keepAlive is /// true, or to "Close" otherwise. - void setKeepAliveTimeout(int timeout); + void setKeepAliveTimeout(int timeout, int max_requests); int getKeepAliveTimeout() const; + int getKeepAliveMaxRequests() const; bool getKeepAlive() const; /// Returns true if diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index bc70559c5ebe..e489ab56b987 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -230,7 +230,25 @@ void HTTPClientSession::setKeepAliveTimeout(const Poco::Timespan& timeout) } -void HTTPClientSession::setLastRequest(Poco::Timestamp time) +void HTTPClientSession::setKeepAliveMaxRequests(int max_requests) +{ + if (connected()) + { + throw Poco::IllegalStateException("cannot change keep alive max requests on initiated connection, " + "That value is managed privately after connection is established."); + } + _keepAliveMaxRequests = max_requests; +} + + +void HTTPClientSession::setKeepAliveRequest(int request) +{ + _keepAliveCurrentRequest = request; +} + + + + void HTTPClientSession::setLastRequest(Poco::Timestamp time) { if (connected()) { @@ -248,6 +266,8 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) clearException(); _responseReceived = false; + _keepAliveCurrentRequest += 1; + bool keepAlive = getKeepAlive(); if (((connected() && !keepAlive) || mustReconnect()) && !_host.empty()) { @@ -261,7 +281,7 @@ std::ostream& HTTPClientSession::sendRequest(HTTPRequest& request) if (!request.has(HTTPMessage::CONNECTION)) request.setKeepAlive(keepAlive); if (keepAlive && !request.has(HTTPMessage::CONNECTION_KEEP_ALIVE) && _keepAliveTimeout.totalSeconds() > 0) - request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds()); + request.setKeepAliveTimeout(_keepAliveTimeout.totalSeconds(), _keepAliveMaxRequests); if (!request.has(HTTPRequest::HOST) && !_host.empty()) request.setHost(_host, _port); if (!_proxyConfig.host.empty() && !bypassProxy()) @@ -349,6 +369,9 @@ std::istream& HTTPClientSession::receiveResponse(HTTPResponse& response) auto timeout = response.getKeepAliveTimeout(); if (timeout > 0) _keepAliveTimeout = std::min(_keepAliveTimeout, Poco::Timespan(timeout, 0)); + auto max_requests = response.getKeepAliveMaxRequests(); + if (max_requests > 0) + _keepAliveMaxRequests = std::min(_keepAliveMaxRequests, max_requests); } if (!_expectResponseBody || response.getStatus() < 200 || response.getStatus() == HTTPResponse::HTTP_NO_CONTENT || response.getStatus() == HTTPResponse::HTTP_NOT_MODIFIED) @@ -460,7 +483,8 @@ std::string HTTPClientSession::proxyRequestPrefix() const bool HTTPClientSession::isKeepAliveExpired(double reliability) const { Poco::Timestamp now; - return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest; + return Timespan(Timestamp::TimeDiff(reliability *_keepAliveTimeout.totalMicroseconds())) <= now - _lastRequest + || _keepAliveCurrentRequest > _keepAliveMaxRequests; } bool HTTPClientSession::mustReconnect() const @@ -551,6 +575,9 @@ void HTTPClientSession::assign(Poco::Net::HTTPClientSession & session) setLastRequest(session.getLastRequest()); setKeepAliveTimeout(session.getKeepAliveTimeout()); + _keepAliveMaxRequests = session._keepAliveMaxRequests; + _keepAliveCurrentRequest = session._keepAliveCurrentRequest; + attachSocket(session.detachSocket()); session.reset(); diff --git a/base/poco/Net/src/HTTPMessage.cpp b/base/poco/Net/src/HTTPMessage.cpp index af743dfa2eb8..c0083ec410c1 100644 --- a/base/poco/Net/src/HTTPMessage.cpp +++ b/base/poco/Net/src/HTTPMessage.cpp @@ -180,27 +180,25 @@ bool HTTPMessage::getKeepAlive() const } -void HTTPMessage::setKeepAliveTimeout(int timeout) +void HTTPMessage::setKeepAliveTimeout(int timeout, int max_requests) { - add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max=1000", timeout)); + add(HTTPMessage::CONNECTION_KEEP_ALIVE, std::format("timeout={}, max={}", timeout, max_requests)); } -int parseTimeoutFromHeaderValue(const std::string_view header_value) +int parseFromHeaderValues(const std::string_view header_value, const std::string_view param_name) { - static const std::string_view timeout_param = "timeout="; - - auto timeout_pos = header_value.find(timeout_param); - if (timeout_pos == std::string::npos) - timeout_pos = header_value.size(); - if (timeout_pos != header_value.size()) - timeout_pos += timeout_param.size(); + auto param_value_pos = header_value.find(param_name); + if (param_value_pos == std::string::npos) + param_value_pos = header_value.size(); + if (param_value_pos != header_value.size()) + param_value_pos += param_name.size(); - auto timeout_end = header_value.find(',', timeout_pos); - if (timeout_end == std::string::npos) - timeout_end = header_value.size(); + auto param_value_end = header_value.find(',', param_value_pos); + if (param_value_end == std::string::npos) + param_value_end = header_value.size(); - auto timeout_value_substr = header_value.substr(timeout_pos, timeout_end - timeout_pos); + auto timeout_value_substr = header_value.substr(param_value_pos, param_value_end - param_value_pos); if (timeout_value_substr.empty()) return -1; @@ -217,7 +215,16 @@ int parseTimeoutFromHeaderValue(const std::string_view header_value) int HTTPMessage::getKeepAliveTimeout() const { const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); - return parseTimeoutFromHeaderValue(ka_header); + static const std::string_view timeout_param = "timeout="; + return parseFromHeaderValues(ka_header, timeout_param); +} + + +int HTTPMessage::getKeepAliveMaxRequests() const +{ + const std::string& ka_header = get(HTTPMessage::CONNECTION_KEEP_ALIVE, HTTPMessage::EMPTY); + static const std::string_view timeout_param = "max="; + return parseFromHeaderValues(ka_header, timeout_param); } } } // namespace Poco::Net diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index eb6ce00e611f..926222934e48 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -301,6 +301,8 @@ class EndpointConnectionPool : public std::enable_shared_from_thisgetConnection(timeouts); Session::assign(*new_connection); + if (Session::getKeepAliveRequest() == 0) + Session::setKeepAliveRequest(1); } else { @@ -322,7 +324,8 @@ class EndpointConnectionPool : public std::enable_shared_from_this - explicit PooledConnection(EndpointConnectionPool::WeakPtr pool_, ConnectionGroup::Ptr group_, IHTTPConnectionPoolForEndpoint::Metrics metrics_, Args &&... args) - : Session(args...), pool(std::move(pool_)), group(group_), metrics(std::move(metrics_)) + explicit PooledConnection( + EndpointConnectionPool::WeakPtr pool_, + ConnectionGroup::Ptr group_, + IHTTPConnectionPoolForEndpoint::Metrics metrics_, + Args &&... args) + : Session(args...) + , pool(std::move(pool_)) + , group(group_) + , metrics(std::move(metrics_)) { CurrentMetrics::add(metrics.active_count); group->atConnectionCreate(); @@ -508,7 +518,7 @@ class EndpointConnectionPool : public std::enable_shared_from_thisusage_cnt += 1; ProfileEvents::increment(getMetrics().reused, 1); CurrentMetrics::sub(getMetrics().stored_count, 1); @@ -655,47 +664,50 @@ class EndpointConnectionPool : public std::enable_shared_from_thisisKeepAliveExpired(0.8); } - ConnectionPtr allocateNewConnection() + + ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts) { - ConnectionPtr connection = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); + auto connection = PooledConnection::create(this->getWeakFromThis(), group, getMetrics(), host, port); + connection->setKeepAlive(true); + setTimeouts(*connection, timeouts); if (!proxy_configuration.isEmpty()) { connection->setProxyConfig(proxyConfigurationToPocoProxyConfig(proxy_configuration)); } - return connection; - } - - ConnectionPtr prepareNewConnection(const ConnectionTimeouts & timeouts) - { auto address = HostResolversPool::instance().getResolver(host)->resolve(); - - auto session = allocateNewConnection(); - - setTimeouts(*session, timeouts); - session->setResolvedHost(*address); + connection->setResolvedHost(*address); try { auto timer = CurrentThread::getProfileEvents().timer(getMetrics().elapsed_microseconds); - session->doConnect(); + connection->doConnect(); } catch (...) { address.setFail(); ProfileEvents::increment(getMetrics().errors); - session->reset(); + connection->reset(); throw; } ProfileEvents::increment(getMetrics().created); - return session; + return connection; } void atConnectionDestroy(PooledConnection & connection) { + if (connection.getKeepAliveRequest() >= connection.getKeepAliveMaxRequests()) + { + LOG_INFO(getLogger("PooledConnection"), "Expired by connection number {}", + connection.getKeepAliveRequest()); + + ProfileEvents::increment(getMetrics().expired, 1); + return; + } + if (!connection.connected() || connection.mustReconnect() || !connection.isCompleted() || connection.buffered() || group->isStoreLimitReached()) { @@ -703,7 +715,7 @@ class EndpointConnectionPool : public std::enable_shared_from_thisgetWeakFromThis(), group, getMetrics(), host, port); connection_to_store->assign(connection); - connection_to_store->usage_cnt = connection.usage_cnt; { MemoryTrackerSwitcher switcher{&total_memory_tracker}; diff --git a/src/Common/tests/gtest_connection_pool.cpp b/src/Common/tests/gtest_connection_pool.cpp index 36bf8bc7dae2..cc091d12bb0d 100644 --- a/src/Common/tests/gtest_connection_pool.cpp +++ b/src/Common/tests/gtest_connection_pool.cpp @@ -47,6 +47,7 @@ struct RequestOptions { size_t slowdown_receive = 0; int overwrite_keep_alive_timeout = 0; + int overwrite_keep_alive_max_requests = 10; }; size_t stream_copy_n(std::istream & in, std::ostream & out, std::size_t count = std::numeric_limits::max()) @@ -89,8 +90,10 @@ class MockRequestHandler : public Poco::Net::HTTPRequestHandler int value = request.getKeepAliveTimeout(); ASSERT_GT(value, 0); - if (options->get().overwrite_keep_alive_timeout > 0) - response.setKeepAliveTimeout(options->get().overwrite_keep_alive_timeout); + auto params = options->get(); + + if (params.overwrite_keep_alive_timeout > 0) + response.setKeepAliveTimeout(params.overwrite_keep_alive_timeout, params.overwrite_keep_alive_max_requests); response.setStatus(Poco::Net::HTTPResponse::HTTP_OK); auto size = request.getContentLength(); @@ -99,8 +102,8 @@ class MockRequestHandler : public Poco::Net::HTTPRequestHandler else response.setChunkedTransferEncoding(true); // or chunk encoding - if (options->get().slowdown_receive > 0) - sleepForSeconds(options->get().slowdown_receive); + if (params.slowdown_receive > 0) + sleepForSeconds(params.slowdown_receive); stream_copy_n(request.stream(), response.send(), size); } @@ -189,10 +192,11 @@ class ConnectionPoolTest : public testing::Test { options->set(std::move(opt)); } - void setOverWriteTimeout(size_t seconds) + void setOverWriteKeepAlive(size_t seconds, int max_requests) { auto opt = options->get(); opt.overwrite_keep_alive_timeout = int(seconds); + opt.overwrite_keep_alive_max_requests= max_requests; options->set(std::move(opt)); } @@ -794,7 +798,7 @@ TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) } { - setOverWriteTimeout(1); + setOverWriteKeepAlive(1, 10); auto connection = pool->getConnection(timeouts); echoRequest("Hello", *connection); ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); @@ -803,7 +807,7 @@ TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) { // server do not overwrite it in the following requests but client has to remember last agreed value - setOverWriteTimeout(0); + setOverWriteKeepAlive(0, 0); auto connection = pool->getConnection(timeouts); echoRequest("Hello", *connection); ASSERT_EQ(30, timeouts.http_keep_alive_timeout.totalSeconds()); @@ -819,3 +823,88 @@ TEST_F(ConnectionPoolTest, ServerOverwriteKeepAlive) ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); } + +TEST_F(ConnectionPoolTest, MaxRequests) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + auto max_requests = 5; + timeouts.http_keep_alive_max_requests = max_requests; + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + for (int i = 1; i <= max_requests - 1; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(i, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(max_requests-2, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(1, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(1, CurrentMetrics::get(metrics.stored_count)); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(max_requests, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(max_requests-1, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(1, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} + + +TEST_F(ConnectionPoolTest, ServerOverwriteMaxRequests) +{ + auto ka = Poco::Timespan(30, 0); // 30 seconds + timeouts.withHTTPKeepAliveTimeout(ka); + + auto pool = getPool(); + auto metrics = pool->getMetrics(); + + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(30, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(1000, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(1, connection->getKeepAliveRequest()); + } + + auto max_requests = 3; + setOverWriteKeepAlive(5, max_requests); + + for (int i = 2; i <= 10*max_requests; ++i) + { + auto connection = pool->getConnection(timeouts); + echoRequest("Hello", *connection); + ASSERT_EQ(5, connection->getKeepAliveTimeout().totalSeconds()); + ASSERT_EQ(max_requests, connection->getKeepAliveMaxRequests()); + ASSERT_EQ(((i-1) % max_requests) + 1, connection->getKeepAliveRequest()); + } + + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.created]); + ASSERT_EQ(10*max_requests-10, DB::CurrentThread::getProfileEvents()[metrics.preserved]); + ASSERT_EQ(10*max_requests-10, DB::CurrentThread::getProfileEvents()[metrics.reused]); + ASSERT_EQ(0, DB::CurrentThread::getProfileEvents()[metrics.reset]); + ASSERT_EQ(10, DB::CurrentThread::getProfileEvents()[metrics.expired]); + + ASSERT_EQ(0, CurrentMetrics::get(metrics.active_count)); + ASSERT_EQ(0, CurrentMetrics::get(metrics.stored_count)); +} diff --git a/src/Core/Defines.h b/src/Core/Defines.h index a8dd26519c2f..f2142bc764d4 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -54,6 +54,7 @@ static constexpr auto DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT = 15; static constexpr auto DEFAULT_TCP_KEEP_ALIVE_TIMEOUT = 290; static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT = 30; +static constexpr auto DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST = 1000; static constexpr auto DBMS_DEFAULT_PATH = "/var/lib/clickhouse/"; diff --git a/src/Disks/ObjectStorages/S3/diskSettings.cpp b/src/Disks/ObjectStorages/S3/diskSettings.cpp index 7ce94699053f..c3114eb0b6ff 100644 --- a/src/Disks/ObjectStorages/S3/diskSettings.cpp +++ b/src/Disks/ObjectStorages/S3/diskSettings.cpp @@ -76,7 +76,8 @@ std::unique_ptr getClient( client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", S3::DEFAULT_CONNECT_TIMEOUT_MS); client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", S3::DEFAULT_REQUEST_TIMEOUT_MS); client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", S3::DEFAULT_MAX_CONNECTIONS); - client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT); + client_configuration.http_keep_alive_timeout = config.getUInt(config_prefix + ".http_keep_alive_timeout", S3::DEFAULT_KEEP_ALIVE_TIMEOUT); + client_configuration.http_keep_alive_max_requests = config.getUInt(config_prefix + ".http_keep_alive_max_requests", S3::DEFAULT_KEEP_ALIVE_MAX_REQUESTS); client_configuration.endpointOverride = uri.endpoint; client_configuration.s3_use_adaptive_timeouts = config.getBool( diff --git a/src/IO/ConnectionTimeouts.cpp b/src/IO/ConnectionTimeouts.cpp index 8813c9581852..da6214ae4771 100644 --- a/src/IO/ConnectionTimeouts.cpp +++ b/src/IO/ConnectionTimeouts.cpp @@ -148,6 +148,7 @@ void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeout if (!session.connected()) { session.setKeepAliveTimeout(timeouts.http_keep_alive_timeout); + session.setKeepAliveMaxRequests(int(timeouts.http_keep_alive_max_requests)); } } diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index 49305f42d85b..f497285bd0c2 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -35,6 +35,8 @@ struct ConnectionTimeouts Poco::Timespan tcp_keep_alive_timeout = Poco::Timespan(DEFAULT_TCP_KEEP_ALIVE_TIMEOUT, 0); Poco::Timespan http_keep_alive_timeout = Poco::Timespan(DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT, 0); + size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; + /// Timeouts for HedgedConnections Poco::Timespan hedged_connection_timeout = Poco::Timespan(DBMS_DEFAULT_RECEIVE_TIMEOUT_SEC, 0); diff --git a/src/IO/S3/Credentials.h b/src/IO/S3/Credentials.h index 34dc0c1d2bd0..8d5862230350 100644 --- a/src/IO/S3/Credentials.h +++ b/src/IO/S3/Credentials.h @@ -22,6 +22,8 @@ inline static constexpr uint64_t DEFAULT_EXPIRATION_WINDOW_SECONDS = 120; inline static constexpr uint64_t DEFAULT_CONNECT_TIMEOUT_MS = 1000; inline static constexpr uint64_t DEFAULT_REQUEST_TIMEOUT_MS = 30000; inline static constexpr uint64_t DEFAULT_MAX_CONNECTIONS = 100; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_TIMEOUT = 5; +inline static constexpr uint64_t DEFAULT_KEEP_ALIVE_MAX_REQUESTS = 100; /// In GCP metadata service can be accessed via DNS regardless of IPv4 or IPv6. static inline constexpr char GCP_METADATA_SERVICE_ENDPOINT[] = "http://metadata.google.internal"; diff --git a/src/IO/S3/PocoHTTPClient.h b/src/IO/S3/PocoHTTPClient.h index f568eb5ddb88..a0b35e9b4a9a 100644 --- a/src/IO/S3/PocoHTTPClient.h +++ b/src/IO/S3/PocoHTTPClient.h @@ -52,6 +52,7 @@ struct PocoHTTPClientConfiguration : public Aws::Client::ClientConfiguration /// See PoolBase::BehaviourOnLimit bool s3_use_adaptive_timeouts = true; size_t http_keep_alive_timeout = DEFAULT_HTTP_KEEP_ALIVE_TIMEOUT; + size_t http_keep_alive_max_requests = DEFAULT_HTTP_KEEP_ALIVE_MAX_REQUEST; std::function error_report; From dddb0d9f4a83569e9a64952b20acfc95da2cdf24 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 5 Apr 2024 03:02:45 +0200 Subject: [PATCH 277/470] fix http_keep_alive_max_requests set up --- src/IO/ConnectionTimeouts.h | 7 +++++++ src/IO/S3/PocoHTTPClient.cpp | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/IO/ConnectionTimeouts.h b/src/IO/ConnectionTimeouts.h index f497285bd0c2..b86ec44d21c0 100644 --- a/src/IO/ConnectionTimeouts.h +++ b/src/IO/ConnectionTimeouts.h @@ -71,6 +71,7 @@ APPLY_FOR_ALL_CONNECTION_TIMEOUT_MEMBERS(DECLARE_BUILDER_FOR_MEMBER) ConnectionTimeouts & withConnectionTimeout(size_t seconds); ConnectionTimeouts & withConnectionTimeout(Poco::Timespan span); + ConnectionTimeouts & withHTTPKeepAliveMaxRequests(size_t requests); }; /// NOLINTBEGIN(bugprone-macro-parentheses) @@ -116,6 +117,12 @@ inline ConnectionTimeouts & ConnectionTimeouts::withConnectionTimeout(Poco::Time return *this; } +inline ConnectionTimeouts & ConnectionTimeouts::withHTTPKeepAliveMaxRequests(size_t requests) +{ + http_keep_alive_max_requests = requests; + return *this; +} + void setTimeouts(Poco::Net::HTTPClientSession & session, const ConnectionTimeouts & timeouts); ConnectionTimeouts getTimeouts(const Poco::Net::HTTPClientSession & session); diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 150b8146147e..de20a712d4c2 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -147,7 +147,8 @@ ConnectionTimeouts getTimeoutsFromConfiguration(const PocoHTTPClientConfiguratio .withReceiveTimeout(Poco::Timespan(client_configuration.requestTimeoutMs * 1000)) .withTCPKeepAliveTimeout(Poco::Timespan( client_configuration.enableTcpKeepAlive ? client_configuration.tcpKeepAliveIntervalMs * 1000 : 0)) - .withHTTPKeepAliveTimeout(Poco::Timespan(client_configuration.http_keep_alive_timeout, 0)); + .withHTTPKeepAliveTimeout(Poco::Timespan(client_configuration.http_keep_alive_timeout, 0)) + .withHTTPKeepAliveMaxRequests(client_configuration.http_keep_alive_max_requests); } PocoHTTPClient::PocoHTTPClient(const PocoHTTPClientConfiguration & client_configuration) From cf982cc114ef5b226815360590e2c207516de658 Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 5 Apr 2024 05:00:01 +0200 Subject: [PATCH 278/470] remove debug logging --- src/Common/HTTPConnectionPool.cpp | 76 ------------------------------- 1 file changed, 76 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 81c36dcd9708..7f99d6a647f0 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -193,18 +193,6 @@ class ConnectionGroup return total_connections_in_group >= limits.store_limit; } - size_t getStored() const - { - std::lock_guard lock(mutex); - return total_connections_in_group; - } - - size_t getStoreLimit() const - { - std::lock_guard lock(mutex); - return limits.store_limit; - } - void atConnectionCreate() { std::lock_guard lock(mutex); @@ -359,33 +347,12 @@ class EndpointConnectionPool : public std::enable_shared_from_this ", k, v)); - } - return out; - } - std::ostream & sendRequest(Poco::Net::HTTPRequest & request) override { auto idle = idleTime(); std::ostream & result = Session::sendRequest(request); result.exceptions(std::ios::badbit); - // that line is for temporary debug, will be removed - LOG_INFO(log, "Send request to {} with: version {}, method {}, request no {}, keep-alive timeout={}, last usage ago: {}ms, headers: {}", - request.getVersion(), - request.getMethod(), - getTarget(), - Session::getKeepAliveRequest(), - Session::getKeepAliveTimeout().totalSeconds(), - idle.totalMilliseconds(), - printAllHeaders(request)); - request_stream = &result; request_stream_completed = false; @@ -397,22 +364,9 @@ class EndpointConnectionPool : public std::enable_shared_from_thisatConnectionDestroy(); if (!isExpired) - { if (auto lock = pool.lock()) lock->atConnectionDestroy(*this); - } - else - { - Poco::Timestamp now; - LOG_INFO(log, "Expired connection to {} with: request no {}, keep alive timeout: {}, last usage ago: {}s", - getTarget(), - Session::getKeepAliveRequest(), - Session::getKeepAliveTimeout().totalSeconds(), - idleTime().totalSeconds()); - } CurrentMetrics::sub(metrics.active_count); } @@ -519,8 +462,6 @@ class EndpointConnectionPool : public std::enable_shared_from_this= connection.getKeepAliveMaxRequests()) { - LOG_INFO(getLogger("PooledConnection"), "Expired by connection number {}", - connection.getKeepAliveRequest()); - ProfileEvents::increment(getMetrics().expired, 1); return; } @@ -711,19 +649,6 @@ class EndpointConnectionPool : public std::enable_shared_from_thisisStoreLimitReached()) { - Poco::Timestamp now; - LOG_INFO(getLogger("PooledConnection"), - "Reset connection to {} with: usage count {}, keep alive timeout: {}, connected {}, must recon {}, last usage ago: {}, is completed {}, store limit reached {} as {}/{}, there is exception {}", - getTarget(), - connection.getKeepAliveRequest(), - connection.getKeepAliveTimeout().totalSeconds(), - connection.connected(), - connection.mustReconnect(), - connection.idleTime().totalSeconds(), - connection.isCompleted(), - group->isStoreLimitReached(), group->getStored(), group->getStoreLimit(), - connection.exception_level - std::uncaught_exceptions()); - ProfileEvents::increment(getMetrics().reset, 1); return; } @@ -833,7 +758,6 @@ class HTTPConnectionPools::Impl ConnectionGroup::Ptr storage_group = std::make_shared(HTTPConnectionGroupType::STORAGE); ConnectionGroup::Ptr http_group = std::make_shared(HTTPConnectionGroupType::HTTP); - /// If multiple mutexes are held simultaneously, /// they should be locked in this order: /// HTTPConnectionPools::mutex, then EndpointConnectionPool::mutex, then ConnectionGroup::mutex. From f47f96a84eb4c45d2c62eb3a6672ed556c5189bc Mon Sep 17 00:00:00 2001 From: Anita Hammer <166057949+anitahammer@users.noreply.github.com> Date: Fri, 5 Apr 2024 09:32:23 +0300 Subject: [PATCH 279/470] Update settings.md --- .../server-configuration-parameters/settings.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 436321c8fe8f..f87b6144deb8 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -436,7 +436,7 @@ Default: 0 Restriction on dropping partitions. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_partition_size_to_drop` (in bytes), you can’t drop a partition using a [DROP PARTITION](../../sql-reference/statements/alter/partition.md#drop-partitionpart) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. The value 0 means that you can drop partitions without any restrictions. @@ -518,7 +518,7 @@ Restriction on deleting tables. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can’t delete it using a [DROP](../../sql-reference/statements/drop.md) query or [TRUNCATE](../../sql-reference/statements/truncate.md) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. The value 0 means that you can delete all tables without any restrictions. @@ -1570,7 +1570,7 @@ Restriction on deleting tables. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_table_size_to_drop` (in bytes), you can’t delete it using a [DROP](../../sql-reference/statements/drop.md) query or [TRUNCATE](../../sql-reference/statements/truncate.md) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. @@ -1588,7 +1588,7 @@ Restriction on dropping partitions. If the size of a [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table exceeds `max_partition_size_to_drop` (in bytes), you can’t drop a partition using a [DROP PARTITION](../../sql-reference/statements/alter/partition.md#drop-partitionpart) query. -This setting does not require a restart of the Clickhouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. +This setting does not require a restart of the ClickHouse server to apply. Another way to disable the restriction is to create the `/flags/force_drop_table` file. Default value: 50 GB. From a0bb341e82fd91e67cf1645b3c1dfecc224e7462 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 19 Mar 2024 13:40:15 +0100 Subject: [PATCH 280/470] Fix 02943_rmt_alter_metadata_merge_checksum_mismatch flakiness CI: https://s3.amazonaws.com/clickhouse-test-reports/60225/082a686cd1e450e18876d7a67d679c2905ec8589/fast_test.html Signed-off-by: Azat Khuzhin --- .../02943_rmt_alter_metadata_merge_checksum_mismatch.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh index 9c0c872eb069..27950866e816 100755 --- a/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh +++ b/tests/queries/0_stateless/02943_rmt_alter_metadata_merge_checksum_mismatch.sh @@ -82,6 +82,8 @@ $CLICKHOUSE_CLIENT -q "optimize table $success_replica final settings optimize_t $CLICKHOUSE_CLIENT -nm --insert_keeper_fault_injection_probability=0 -q " insert into $success_replica (key) values (2); -- part all_2_2_0 + -- Avoid 'Cannot select parts for optimization: Entry for part all_2_2_0 hasn't been read from the replication log yet' + system sync replica $success_replica pull; optimize table $success_replica final settings optimize_throw_if_noop=1, alter_sync=1; -- part all_0_2_2_1 system sync replica $failed_replica pull; " From 81eda37f7f0a62cd1a4499c56a66daa7ef981827 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 5 Apr 2024 10:27:13 +0200 Subject: [PATCH 281/470] Print correct count --- utils/postprocess-traces/postprocess-traces.pl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/postprocess-traces/postprocess-traces.pl b/utils/postprocess-traces/postprocess-traces.pl index 3e50f64d864e..1c198908580c 100755 --- a/utils/postprocess-traces/postprocess-traces.pl +++ b/utils/postprocess-traces/postprocess-traces.pl @@ -13,9 +13,9 @@ sub process_stacktrace my $group = \$grouped_stacks; for my $frame (reverse @current_stack) { + $group = \$$group->{children}{$frame}; $$group->{count} ||= 0; ++$$group->{count}; - $group = \$$group->{children}{$frame}; } @current_stack = (); @@ -47,7 +47,7 @@ sub print_group for my $key (sort { $group->{children}{$b}{count} <=> $group->{children}{$a}{count} } keys %{$group->{children}}) { - my $count = $group->{count}; + my $count = $group->{children}{$key}{count}; print(('| ' x $level) . $count . (' ' x (5 - (length $count))) . $key . "\n"); print_group($group->{children}{$key}, $level + 1); } From 9d8f643f5b306ff02ed8e55dd776afb04e67de49 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 4 Apr 2024 20:58:35 +0000 Subject: [PATCH 282/470] Cleanup SSH-based authentication code --- contrib/libssh-cmake/CMakeLists.txt | 55 ++++++-------- programs/client/Client.cpp | 4 +- src/Access/Authentication.cpp | 37 ++++++--- src/Access/AuthenticationData.cpp | 13 ++-- src/Access/AuthenticationData.h | 14 +++- src/Access/Common/AuthenticationType.h | 4 +- src/Access/Credentials.h | 5 +- src/Access/User.cpp | 2 +- src/Access/UsersConfigAccessStorage.cpp | 8 +- src/CMakeLists.txt | 5 +- src/Client/Connection.cpp | 36 ++++----- src/Client/Connection.h | 12 +-- src/Client/ConnectionParameters.cpp | 11 ++- src/Client/ConnectionParameters.h | 7 +- src/Client/ConnectionPool.h | 2 +- .../{SSH/Wrappers.cpp => SSHWrapper.cpp} | 75 +++++++++---------- src/Common/{SSH/Wrappers.h => SSHWrapper.h} | 32 +++----- src/Core/Protocol.h | 9 ++- src/Parsers/Access/ParserPublicSSHKey.cpp | 2 +- src/Server/TCPHandler.cpp | 50 ++++++------- src/Server/TCPHandler.h | 3 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- .../0_stateless/02867_create_user_ssh.sql | 12 ++- 23 files changed, 203 insertions(+), 197 deletions(-) rename src/Common/{SSH/Wrappers.cpp => SSHWrapper.cpp} (66%) rename src/Common/{SSH/Wrappers.h => SSHWrapper.h} (73%) diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt index 7b589718140c..ecd1fccb800e 100644 --- a/contrib/libssh-cmake/CMakeLists.txt +++ b/contrib/libssh-cmake/CMakeLists.txt @@ -1,26 +1,18 @@ -option (ENABLE_SSH "Enable support for SSH keys and protocol" ${ENABLE_LIBRARIES}) +option (ENABLE_SSH "Enable support for libssh" ${ENABLE_LIBRARIES}) if (NOT ENABLE_SSH) - message(STATUS "Not using SSH") + message(STATUS "Not using libssh") return() endif() +# CMake variables needed by libssh_version.h.cmake, update them when you update libssh +set(libssh_VERSION_MAJOR 0) +set(libssh_VERSION_MINOR 9) +set(libssh_VERSION_PATCH 8) + set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh") set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh") -# Set CMake variables which are used in libssh_version.h.cmake -project(libssh VERSION 0.9.8 LANGUAGES C) - -set(LIBRARY_VERSION "4.8.8") -set(LIBRARY_SOVERSION "4") - -set(CMAKE_THREAD_PREFER_PTHREADS ON) -set(THREADS_PREFER_PTHREAD_FLAG ON) - -set(WITH_ZLIB OFF) -set(WITH_SYMBOL_VERSIONING OFF) -set(WITH_SERVER ON) - set(libssh_SRCS ${LIB_SOURCE_DIR}/src/agent.c ${LIB_SOURCE_DIR}/src/auth.c @@ -28,15 +20,21 @@ set(libssh_SRCS ${LIB_SOURCE_DIR}/src/bignum.c ${LIB_SOURCE_DIR}/src/buffer.c ${LIB_SOURCE_DIR}/src/callbacks.c + ${LIB_SOURCE_DIR}/src/chachapoly.c ${LIB_SOURCE_DIR}/src/channels.c ${LIB_SOURCE_DIR}/src/client.c ${LIB_SOURCE_DIR}/src/config.c + ${LIB_SOURCE_DIR}/src/config_parser.c ${LIB_SOURCE_DIR}/src/connect.c ${LIB_SOURCE_DIR}/src/connector.c ${LIB_SOURCE_DIR}/src/curve25519.c ${LIB_SOURCE_DIR}/src/dh.c ${LIB_SOURCE_DIR}/src/ecdh.c ${LIB_SOURCE_DIR}/src/error.c + ${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c + ${LIB_SOURCE_DIR}/src/external/blowfish.c + ${LIB_SOURCE_DIR}/src/external/chacha.c + ${LIB_SOURCE_DIR}/src/external/poly1305.c ${LIB_SOURCE_DIR}/src/getpass.c ${LIB_SOURCE_DIR}/src/init.c ${LIB_SOURCE_DIR}/src/kdf.c @@ -55,37 +53,32 @@ set(libssh_SRCS ${LIB_SOURCE_DIR}/src/pcap.c ${LIB_SOURCE_DIR}/src/pki.c ${LIB_SOURCE_DIR}/src/pki_container_openssh.c + ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c ${LIB_SOURCE_DIR}/src/poll.c - ${LIB_SOURCE_DIR}/src/session.c ${LIB_SOURCE_DIR}/src/scp.c + ${LIB_SOURCE_DIR}/src/session.c ${LIB_SOURCE_DIR}/src/socket.c ${LIB_SOURCE_DIR}/src/string.c ${LIB_SOURCE_DIR}/src/threads.c - ${LIB_SOURCE_DIR}/src/wrapper.c - ${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c - ${LIB_SOURCE_DIR}/src/external/blowfish.c - ${LIB_SOURCE_DIR}/src/external/chacha.c - ${LIB_SOURCE_DIR}/src/external/poly1305.c - ${LIB_SOURCE_DIR}/src/chachapoly.c - ${LIB_SOURCE_DIR}/src/config_parser.c ${LIB_SOURCE_DIR}/src/token.c - ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c + ${LIB_SOURCE_DIR}/src/wrapper.c + # some files of libssh/src/ are missing - why? ${LIB_SOURCE_DIR}/src/threads/noop.c ${LIB_SOURCE_DIR}/src/threads/pthread.c + # files missing - why? # LIBCRYPT specific - ${libssh_SRCS} - ${LIB_SOURCE_DIR}/src/threads/libcrypto.c - ${LIB_SOURCE_DIR}/src/pki_crypto.c + ${LIB_SOURCE_DIR}/src/dh_crypto.c ${LIB_SOURCE_DIR}/src/ecdh_crypto.c ${LIB_SOURCE_DIR}/src/libcrypto.c - ${LIB_SOURCE_DIR}/src/dh_crypto.c + ${LIB_SOURCE_DIR}/src/pki_crypto.c + ${LIB_SOURCE_DIR}/src/threads/libcrypto.c - ${LIB_SOURCE_DIR}/src/options.c - ${LIB_SOURCE_DIR}/src/server.c ${LIB_SOURCE_DIR}/src/bind.c ${LIB_SOURCE_DIR}/src/bind_config.c + ${LIB_SOURCE_DIR}/src/options.c + ${LIB_SOURCE_DIR}/src/server.c ) if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) @@ -94,7 +87,7 @@ endif() configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake ${LIB_BINARY_DIR}/include/libssh/libssh_version.h @ONLY) -add_library(_ssh STATIC ${libssh_SRCS}) +add_library(_ssh ${libssh_SRCS}) add_library(ch_contrib::ssh ALIAS _ssh) target_link_libraries(_ssh PRIVATE OpenSSL::Crypto) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 192f9e61891d..72cad1dac076 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -934,8 +934,8 @@ void Client::addOptions(OptionsDescription & options_description) ("user,u", po::value()->default_value("default"), "user") ("password", po::value(), "password") ("ask-password", "ask-password") - ("ssh-key-file", po::value(), "File containing ssh private key needed for authentication. If not set does password authentication.") - ("ssh-key-passphrase", po::value(), "Passphrase for imported ssh key.") + ("ssh-key-file", po::value(), "File containing the SSH private key for authenticate with the server.") + ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 47187d831548..bf1fe3feec3e 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -4,11 +4,12 @@ #include #include #include -#include #include +#include +#include #include -#include +#include "config.h" namespace DB { @@ -74,7 +75,7 @@ namespace } #if USE_SSH - bool checkSshSignature(const std::vector & keys, std::string_view signature, std::string_view original) + bool checkSshSignature(const std::vector & keys, std::string_view signature, std::string_view original) { for (const auto & key: keys) if (key.isPublic() && key.verifySignature(signature, original)) @@ -114,7 +115,11 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::MAX: break; @@ -145,7 +150,11 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::MAX: break; @@ -178,7 +187,11 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::BCRYPT_PASSWORD: return checkPasswordBcrypt(basic_credentials->getPassword(), auth_data.getPasswordHashBinary()); @@ -216,13 +229,18 @@ bool Authentication::areCredentialsValid( return auth_data.getSSLCertificateCommonNames().contains(ssl_certificate_credentials->getCommonName()); case AuthenticationType::SSH_KEY: - throw Authentication::Require("Ssh Keys Authentication"); +#if USE_SSH + throw Authentication::Require("SSH Keys Authentication"); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif case AuthenticationType::MAX: break; } } +#if USE_SSH if (const auto * ssh_credentials = typeid_cast(&credentials)) { switch (auth_data.getType()) @@ -243,15 +261,12 @@ bool Authentication::areCredentialsValid( throw Authentication::Require("ClickHouse X.509 Authentication"); case AuthenticationType::SSH_KEY: -#if USE_SSH return checkSshSignature(auth_data.getSSHKeys(), ssh_credentials->getSignature(), ssh_credentials->getOriginal()); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); -#endif case AuthenticationType::MAX: break; } } +#endif if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return true; diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index da90a0f5842c..a4c25b438e88 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -105,7 +105,10 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) && (lhs.ssl_certificate_common_names == rhs.ssl_certificate_common_names) - && (lhs.ssh_keys == rhs.ssh_keys) && (lhs.http_auth_scheme == rhs.http_auth_scheme) +#if USE_SSH + && (lhs.ssh_keys == rhs.ssh_keys) +#endif + && (lhs.http_auth_scheme == rhs.http_auth_scheme) && (lhs.http_auth_server_name == rhs.http_auth_server_name); } @@ -326,7 +329,7 @@ std::shared_ptr AuthenticationData::toAST() const break; #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } case AuthenticationType::HTTP: @@ -355,7 +358,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que { #if USE_SSH AuthenticationData auth_data(*query.type); - std::vector keys; + std::vector keys; size_t args_size = query.children.size(); for (size_t i = 0; i < args_size; ++i) @@ -366,7 +369,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que try { - keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(key_base64, type)); + keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(key_base64, type)); } catch (const std::invalid_argument &) { @@ -377,7 +380,7 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que auth_data.setSSHKeys(std::move(keys)); return auth_data; #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } diff --git a/src/Access/AuthenticationData.h b/src/Access/AuthenticationData.h index feef4d71d668..c97e0327b569 100644 --- a/src/Access/AuthenticationData.h +++ b/src/Access/AuthenticationData.h @@ -2,14 +2,16 @@ #include #include +#include #include #include -#include #include #include #include +#include "config.h" + namespace DB { @@ -59,8 +61,10 @@ class AuthenticationData const boost::container::flat_set & getSSLCertificateCommonNames() const { return ssl_certificate_common_names; } void setSSLCertificateCommonNames(boost::container::flat_set common_names_); - const std::vector & getSSHKeys() const { return ssh_keys; } - void setSSHKeys(std::vector && ssh_keys_) { ssh_keys = std::forward>(ssh_keys_); } +#if USE_SSH + const std::vector & getSSHKeys() const { return ssh_keys; } + void setSSHKeys(std::vector && ssh_keys_) { ssh_keys = std::forward>(ssh_keys_); } +#endif HTTPAuthenticationScheme getHTTPAuthenticationScheme() const { return http_auth_scheme; } void setHTTPAuthenticationScheme(HTTPAuthenticationScheme scheme) { http_auth_scheme = scheme; } @@ -94,7 +98,9 @@ class AuthenticationData String kerberos_realm; boost::container::flat_set ssl_certificate_common_names; String salt; - std::vector ssh_keys; +#if USE_SSH + std::vector ssh_keys; +#endif /// HTTP authentication properties String http_auth_server_name; HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC; diff --git a/src/Access/Common/AuthenticationType.h b/src/Access/Common/AuthenticationType.h index 48ace3ca00a9..506c8abd3b11 100644 --- a/src/Access/Common/AuthenticationType.h +++ b/src/Access/Common/AuthenticationType.h @@ -34,8 +34,8 @@ enum class AuthenticationType /// Password is encrypted in bcrypt hash. BCRYPT_PASSWORD, - /// Server sends a random string named `challenge` which client needs to encrypt with private key. - /// The check is performed on server side by decrypting the data and comparing with the original string. + /// Server sends a random string named `challenge` to the client. The client encrypts it with its SSH private key. + /// The server decrypts the result using the SSH public key registered for the user and compares with the original string. SSH_KEY, /// Authentication through HTTP protocol diff --git a/src/Access/Credentials.h b/src/Access/Credentials.h index 77b90eaaebce..d04f8a66541d 100644 --- a/src/Access/Credentials.h +++ b/src/Access/Credentials.h @@ -3,6 +3,7 @@ #include #include +#include "config.h" namespace DB { @@ -86,10 +87,11 @@ class MySQLNative41Credentials : public CredentialsWithScramble using CredentialsWithScramble::CredentialsWithScramble; }; +#if USE_SSH class SshCredentials : public Credentials { public: - explicit SshCredentials(const String& user_name_, const String& signature_, const String& original_) + SshCredentials(const String & user_name_, const String & signature_, const String & original_) : Credentials(user_name_), signature(signature_), original(original_) { is_ready = true; @@ -117,5 +119,6 @@ class SshCredentials : public Credentials String signature; String original; }; +#endif } diff --git a/src/Access/User.cpp b/src/Access/User.cpp index 39930c9cf76b..ef5cf7221130 100644 --- a/src/Access/User.cpp +++ b/src/Access/User.cpp @@ -31,7 +31,7 @@ void User::setName(const String & name_) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name is empty"); if (name_ == EncodedUserInfo::USER_INTERSERVER_MARKER) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); - if (startsWith(name_, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER)) + if (name_.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "User name '{}' is reserved", name_); name = name_; } diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index b4b843fc77ea..e3c45eb45aeb 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include @@ -10,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -214,7 +214,7 @@ namespace Poco::Util::AbstractConfiguration::Keys entries; config.keys(ssh_keys_config, entries); - std::vector keys; + std::vector keys; for (const String& entry : entries) { const auto conf_pref = ssh_keys_config + "." + entry + "."; @@ -237,7 +237,7 @@ namespace try { - keys.emplace_back(ssh::SSHKeyFactory::makePublicFromBase64(base64_key, type)); + keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(base64_key, type)); } catch (const std::invalid_argument &) { @@ -249,7 +249,7 @@ namespace } user->auth_data.setSSHKeys(std::move(keys)); #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } else if (has_http_auth) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 73aa409e9958..da17bc1f41f3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -85,7 +85,6 @@ add_headers_and_sources(clickhouse_common_io Common) add_headers_and_sources(clickhouse_common_io Common/HashTable) add_headers_and_sources(clickhouse_common_io Common/Scheduler) add_headers_and_sources(clickhouse_common_io Common/Scheduler/Nodes) -add_headers_and_sources(clickhouse_common_io Common/SSH) add_headers_and_sources(clickhouse_common_io IO) add_headers_and_sources(clickhouse_common_io IO/Archives) add_headers_and_sources(clickhouse_common_io IO/S3) @@ -99,7 +98,6 @@ add_headers_and_sources(clickhouse_compression Core) #Included these specific files to avoid linking grpc add_glob(clickhouse_compression_headers Server/ServerType.h) add_glob(clickhouse_compression_sources Server/ServerType.cpp) -add_headers_and_sources(clickhouse_compression Common/SSH) add_library(clickhouse_compression ${clickhouse_compression_headers} ${clickhouse_compression_sources}) @@ -370,8 +368,7 @@ if (TARGET ch_contrib::crc32-vpmsum) endif() if (TARGET ch_contrib::ssh) - target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh) - target_link_libraries(clickhouse_compression PUBLIC ch_contrib::ssh) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::ssh) endif() dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 180942e6b838..5a1d7a2acc48 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -67,7 +67,7 @@ Connection::~Connection() = default; Connection::Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, - const ssh::SSHKey & ssh_private_key_, + [[maybe_unused]] const SSHKey & ssh_private_key_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -76,7 +76,9 @@ Connection::Connection(const String & host_, UInt16 port_, Protocol::Secure secure_) : host(host_), port(port_), default_database(default_database_) , user(user_), password(password_) +#if USE_SSH , ssh_private_key(ssh_private_key_) +#endif , quota_key(quota_key_) , cluster(cluster_) , cluster_secret(cluster_secret_) @@ -276,17 +278,6 @@ void Connection::disconnect() } -String Connection::packStringForSshSign(String challenge) -{ - String message; - message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION)); - message.append(default_database); - message.append(user); - message.append(challenge); - return message; -} - - void Connection::sendHello() { /** Disallow control characters in user controlled parameters @@ -334,10 +325,10 @@ void Connection::sendHello() #endif } #if USE_SSH - /// Just inform server that we will authenticate using SSH keys. else if (!ssh_private_key.isEmpty()) { - writeStringBinary(fmt::format("{}{}", EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER, user), *out); + /// Inform server that we will authenticate using SSH keys. + writeStringBinary(String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) + user, *out); writeStringBinary(password, *out); performHandshakeForSSHAuth(); @@ -361,9 +352,9 @@ void Connection::sendAddendum() } +#if USE_SSH void Connection::performHandshakeForSSHAuth() { -#if USE_SSH String challenge; { writeVarUInt(Protocol::Client::SSHChallengeRequest, *out); @@ -388,12 +379,23 @@ void Connection::performHandshakeForSSHAuth() } writeVarUInt(Protocol::Client::SSHChallengeResponse, *out); - String to_sign = packStringForSshSign(challenge); + + auto pack_string_for_ssh_sign = [&](String challenge_) + { + String message; + message.append(std::to_string(DBMS_TCP_PROTOCOL_VERSION)); + message.append(default_database); + message.append(user); + message.append(challenge_); + return message; + }; + + String to_sign = pack_string_for_ssh_sign(challenge); String signature = ssh_private_key.signString(to_sign); writeStringBinary(signature, *out); out->next(); -#endif } +#endif void Connection::receiveHello(const Poco::Timespan & handshake_timeout) diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 5d0411027a1c..2cd325afed29 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -1,10 +1,9 @@ #pragma once - #include -#include #include +#include #include #include @@ -53,7 +52,7 @@ class Connection : public IServerConnection Connection(const String & host_, UInt16 port_, const String & default_database_, const String & user_, const String & password_, - const ssh::SSHKey & ssh_private_key_, + const SSHKey & ssh_private_key_, const String & quota_key_, const String & cluster_, const String & cluster_secret_, @@ -170,7 +169,9 @@ class Connection : public IServerConnection String default_database; String user; String password; - ssh::SSHKey ssh_private_key; +#if USE_SSH + SSHKey ssh_private_key; +#endif String quota_key; /// For inter-server authorization @@ -265,9 +266,10 @@ class Connection : public IServerConnection void connect(const ConnectionTimeouts & timeouts); void sendHello(); - String packStringForSshSign(String challenge); +#if USE_SSH void performHandshakeForSSHAuth(); +#endif void sendAddendum(); void receiveHello(const Poco::Timespan & handshake_timeout); diff --git a/src/Client/ConnectionParameters.cpp b/src/Client/ConnectionParameters.cpp index 16911f97e84a..774f3375f63f 100644 --- a/src/Client/ConnectionParameters.cpp +++ b/src/Client/ConnectionParameters.cpp @@ -1,11 +1,10 @@ #include "ConnectionParameters.h" -#include + #include #include #include #include #include -#include #include #include #include @@ -88,19 +87,19 @@ ConnectionParameters::ConnectionParameters(const Poco::Util::AbstractConfigurati } else { - std::string prompt{"Enter your private key passphrase (leave empty for no passphrase): "}; + std::string prompt{"Enter your SSH private key passphrase (leave empty for no passphrase): "}; char buf[1000] = {}; if (auto * result = readpassphrase(prompt.c_str(), buf, sizeof(buf), 0)) passphrase = result; } - ssh::SSHKey key = ssh::SSHKeyFactory::makePrivateFromFile(filename, passphrase); + SSHKey key = SSHKeyFactory::makePrivateKeyFromFile(filename, passphrase); if (!key.isPrivate()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Found public key in file: {} but expected private", filename); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "File {} did not contain a private key (is it a public key?)", filename); ssh_private_key = std::move(key); #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without OpenSSL"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); #endif } diff --git a/src/Client/ConnectionParameters.h b/src/Client/ConnectionParameters.h index 5f375f09c83f..f23522d48b3b 100644 --- a/src/Client/ConnectionParameters.h +++ b/src/Client/ConnectionParameters.h @@ -1,9 +1,10 @@ #pragma once -#include +#include #include #include -#include + +#include namespace Poco::Util { @@ -20,7 +21,7 @@ struct ConnectionParameters std::string user; std::string password; std::string quota_key; - ssh::SSHKey ssh_private_key; + SSHKey ssh_private_key; Protocol::Secure security = Protocol::Secure::Disable; Protocol::Compression compression = Protocol::Compression::Enable; ConnectionTimeouts timeouts; diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index 574c4992d752..d35c25524616 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -123,7 +123,7 @@ class ConnectionPool : public IConnectionPool, private PoolBase { return std::make_shared( host, port, - default_database, user, password, ssh::SSHKey(), quota_key, + default_database, user, password, SSHKey(), quota_key, cluster, cluster_secret, client_name, compression, secure); } diff --git a/src/Common/SSH/Wrappers.cpp b/src/Common/SSHWrapper.cpp similarity index 66% rename from src/Common/SSH/Wrappers.cpp rename to src/Common/SSHWrapper.cpp index a9b9f758c6e3..0ed266f215cd 100644 --- a/src/Common/SSH/Wrappers.cpp +++ b/src/Common/SSHWrapper.cpp @@ -1,4 +1,5 @@ -#include +#include + # if USE_SSH # include @@ -10,6 +11,14 @@ # pragma clang diagnostic pop +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LIBSSH_ERROR; +} + namespace { @@ -18,17 +27,19 @@ class SSHString public: explicit SSHString(std::string_view input) { - string = ssh_string_new(input.size()); - ssh_string_fill(string, input.data(), input.size()); + if (string = ssh_string_new(input.size()); string == nullptr) + throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString"); + if (int rc = ssh_string_fill(string, input.data(), input.size()); rc != SSH_OK) + throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't create SSHString"); } - explicit SSHString(ssh_string c_other) { string = c_other; } + explicit SSHString(ssh_string other) { string = other; } ssh_string get() { return string; } String toString() { - return String(ssh_string_get_char(string), ssh_string_len(string)); + return {ssh_string_get_char(string), ssh_string_len(string)}; } ~SSHString() @@ -42,46 +53,28 @@ class SSHString } -namespace DB -{ - -namespace ErrorCodes -{ - extern const int LIBSSH_ERROR; -} - -namespace ssh -{ - -SSHKey SSHKeyFactory::makePrivateFromFile(String filename, String passphrase) +SSHKey SSHKeyFactory::makePrivateKeyFromFile(String filename, String passphrase) { ssh_key key; - int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key); - if (rc != SSH_OK) - { + if (int rc = ssh_pki_import_privkey_file(filename.c_str(), passphrase.c_str(), nullptr, nullptr, &key); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH private key from file"); - } return SSHKey(key); } -SSHKey SSHKeyFactory::makePublicFromFile(String filename) +SSHKey SSHKeyFactory::makePublicKeyFromFile(String filename) { ssh_key key; - int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key); - if (rc != SSH_OK) + if (int rc = ssh_pki_import_pubkey_file(filename.c_str(), &key); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Can't import SSH public key from file"); - return SSHKey(key); } -SSHKey SSHKeyFactory::makePublicFromBase64(String base64_key, String type_name) +SSHKey SSHKeyFactory::makePublicKeyFromBase64(String base64_key, String type_name) { ssh_key key; auto key_type = ssh_key_type_from_name(type_name.c_str()); - int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key); - if (rc != SSH_OK) + if (int rc = ssh_pki_import_pubkey_base64(base64_key.c_str(), key_type, &key); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Bad SSH public key provided"); - return SSHKey(key); } @@ -90,6 +83,12 @@ SSHKey::SSHKey(const SSHKey & other) key = ssh_key_dup(other.key); } +SSHKey::SSHKey(SSHKey && other) noexcept +{ + key = other.key; + other.key = nullptr; +} + SSHKey & SSHKey::operator=(const SSHKey & other) { ssh_key_free(key); @@ -119,13 +118,11 @@ bool SSHKey::isEqual(const SSHKey & other) const String SSHKey::signString(std::string_view input) const { SSHString input_str(input); - ssh_string c_output = nullptr; - int rc = pki_sign_string(key, input_str.get(), &c_output); - if (rc != SSH_OK) + ssh_string output = nullptr; + if (int rc = pki_sign_string(key, input_str.get(), &output); rc != SSH_OK) throw Exception(ErrorCodes::LIBSSH_ERROR, "Error singing with ssh key"); - - SSHString output(c_output); - return output.toString(); + SSHString output_str(output); + return output_str.toString(); } bool SSHKey::verifySignature(std::string_view signature, std::string_view original) const @@ -149,18 +146,15 @@ namespace { struct CStringDeleter { - [[maybe_unused]] void operator()(char * ptr) const { std::free(ptr); } + void operator()(char * ptr) const { std::free(ptr); } }; } String SSHKey::getBase64() const { char * buf = nullptr; - int rc = ssh_pki_export_pubkey_base64(key, &buf); - - if (rc != SSH_OK) + if (int rc = ssh_pki_export_pubkey_base64(key, &buf); rc != SSH_OK) throw DB::Exception(DB::ErrorCodes::LIBSSH_ERROR, "Failed to export public key to base64"); - /// Create a String from cstring, which makes a copy of the first one and requires freeing memory after it /// This is to safely manage buf memory std::unique_ptr buf_ptr(buf); @@ -177,7 +171,6 @@ SSHKey::~SSHKey() ssh_key_free(key); // it's safe free from libssh } -} } #endif diff --git a/src/Common/SSH/Wrappers.h b/src/Common/SSHWrapper.h similarity index 73% rename from src/Common/SSH/Wrappers.h rename to src/Common/SSHWrapper.h index 699bba2b0424..b6f0c577edcd 100644 --- a/src/Common/SSH/Wrappers.h +++ b/src/Common/SSHWrapper.h @@ -1,20 +1,18 @@ #pragma once + #include + +#include +#include + #include "config.h" -#if USE_SSH -# include -# include +#if USE_SSH using ssh_key = struct ssh_key_struct *; namespace DB { -namespace ssh -{ - -class SSHKeyFactory; - class SSHKey { public: @@ -22,11 +20,7 @@ class SSHKey ~SSHKey(); SSHKey(const SSHKey & other); - SSHKey(SSHKey && other) noexcept - { - key = other.key; - other.key = nullptr; - } + SSHKey(SSHKey && other) noexcept; SSHKey & operator=(const SSHKey & other); SSHKey & operator=(SSHKey && other) noexcept; @@ -43,7 +37,7 @@ class SSHKey String getBase64() const; String getKeyType() const; - friend SSHKeyFactory; + friend class SSHKeyFactory; private: explicit SSHKey(ssh_key key_) : key(key_) { } ssh_key key = nullptr; @@ -56,17 +50,14 @@ class SSHKeyFactory /// The check whether the path is allowed to read for ClickHouse has /// (e.g. a file is inside `user_files` directory) /// to be done outside of this functions. - static SSHKey makePrivateFromFile(String filename, String passphrase); - static SSHKey makePublicFromFile(String filename); - static SSHKey makePublicFromBase64(String base64_key, String type_name); + static SSHKey makePrivateKeyFromFile(String filename, String passphrase); + static SSHKey makePublicKeyFromFile(String filename); + static SSHKey makePublicKeyFromBase64(String base64_key, String type_name); }; -} } #else -namespace ssh -{ class SSHKey { public: @@ -74,5 +65,4 @@ class SSHKey [[ noreturn ]] bool isEmpty() { std::terminate(); } [[ noreturn ]] String signString(std::string_view) const { std::terminate(); } }; -} #endif diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index 441e22f4a164..481071547534 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -56,10 +56,11 @@ namespace DB namespace EncodedUserInfo { -/// Marker of the inter-server secret (passed in the user name) +/// Marker for the inter-server secret (passed as the user name) /// (anyway user cannot be started with a whitespace) const char USER_INTERSERVER_MARKER[] = " INTERSERVER SECRET "; -/// Marker of the SSH keys based authentication (passed in the user name) + +/// Marker for SSH-keys-based authentication (passed as the user name) const char SSH_KEY_AUTHENTICAION_MARKER[] = " SSH KEY AUTHENTICATION "; }; @@ -160,8 +161,8 @@ namespace Protocol ReadTaskResponse = 9, /// A filename to read from s3 (used in s3Cluster) MergeTreeReadTaskResponse = 10, /// Coordinator's decision with a modified set of mark ranges allowed to read - SSHChallengeRequest = 11, /// Request for SSH signature challenge - SSHChallengeResponse = 12, /// Request for SSH signature challenge + SSHChallengeRequest = 11, /// Request SSH signature challenge + SSHChallengeResponse = 12, /// Reply to SSH signature challenge MAX = SSHChallengeResponse, }; diff --git a/src/Parsers/Access/ParserPublicSSHKey.cpp b/src/Parsers/Access/ParserPublicSSHKey.cpp index bc033e25bbb9..9102044900de 100644 --- a/src/Parsers/Access/ParserPublicSSHKey.cpp +++ b/src/Parsers/Access/ParserPublicSSHKey.cpp @@ -1,6 +1,6 @@ #include -#include +#include #include #include diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 5c08c6974346..4e3d6ab69f65 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1371,17 +1371,6 @@ std::string formatHTTPErrorResponseWhenUserIsConnectedToWrongPort(const Poco::Ut return result; } -[[ maybe_unused ]] String createChallenge() -{ -#if USE_SSL - pcg64_fast rng(randomSeed()); - UInt64 rand = rng(); - return encodeSHA256(&rand, sizeof(rand)); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Can't generate challenge, because ClickHouse was built without OpenSSL"); -#endif -} - } std::unique_ptr TCPHandler::makeSession() @@ -1399,16 +1388,6 @@ std::unique_ptr TCPHandler::makeSession() return res; } -String TCPHandler::prepareStringForSshValidation(String username, String challenge) -{ - String output; - output.append(std::to_string(client_tcp_protocol_version)); - output.append(default_database); - output.append(username); - output.append(challenge); - return output; -} - void TCPHandler::receiveHello() { /// Receive `hello` packet. @@ -1466,11 +1445,9 @@ void TCPHandler::receiveHello() return; } - is_ssh_based_auth = startsWith(user, EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty(); + is_ssh_based_auth = user.starts_with(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER) && password.empty(); if (is_ssh_based_auth) - { - user.erase(0, String(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size()); - } + user.erase(0, std::string_view(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size()); session = makeSession(); const auto & client_info = session->getClientInfo(); @@ -1498,7 +1475,9 @@ void TCPHandler::receiveHello() } } } +#endif +#if USE_SSH /// Perform handshake for SSH authentication if (is_ssh_based_auth) { @@ -1512,7 +1491,14 @@ void TCPHandler::receiveHello() if (packet_type != Protocol::Client::SSHChallengeRequest) throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet for requesting a challenge string"); - auto challenge = createChallenge(); + auto create_challenge = []() + { + pcg64_fast rng(randomSeed()); + UInt64 rand = rng(); + return encodeSHA256(&rand, sizeof(rand)); + }; + + String challenge = create_challenge(); writeVarUInt(Protocol::Server::SSHChallenge, *out); writeStringBinary(challenge, *out); out->next(); @@ -1523,7 +1509,17 @@ void TCPHandler::receiveHello() throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Server expected to receive a packet with a response for a challenge"); readStringBinary(signature, *in); - auto cred = SshCredentials(user, signature, prepareStringForSshValidation(user, challenge)); + auto prepare_string_for_ssh_validation = [&](const String & username, const String & challenge_) + { + String output; + output.append(std::to_string(client_tcp_protocol_version)); + output.append(default_database); + output.append(username); + output.append(challenge_); + return output; + }; + + auto cred = SshCredentials(user, signature, prepare_string_for_ssh_validation(user, challenge)); session->authenticate(cred, getClientAddress(client_info)); return; } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 28259d3a3257..191617f19050 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -216,7 +216,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection String default_database; - bool is_ssh_based_auth = false; + bool is_ssh_based_auth = false; /// authentication is via SSH pub-key challenge /// For inter-server secret (remote_server.*.secret) bool is_interserver_mode = false; bool is_interserver_authenticated = false; @@ -248,7 +248,6 @@ class TCPHandler : public Poco::Net::TCPServerConnection void extractConnectionSettingsFromContext(const ContextPtr & context); std::unique_ptr makeSession(); - String prepareStringForSshValidation(String user, String challenge); bool receiveProxyHeader(); void receiveHello(); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c4b84a0ae8cc..29ebd114b9c2 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5656,7 +5656,7 @@ std::optional StorageReplicatedMergeTree::distributedWriteFromClu { auto connection = std::make_shared( node.host_name, node.port, query_context->getGlobalContext()->getCurrentDatabase(), - node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, SSHKey(), node.quota_key, node.cluster, node.cluster_secret, "ParallelInsertSelectInititiator", node.compression, node.secure diff --git a/tests/queries/0_stateless/02867_create_user_ssh.sql b/tests/queries/0_stateless/02867_create_user_ssh.sql index 08236bdbcfe1..3e3cb30a6016 100644 --- a/tests/queries/0_stateless/02867_create_user_ssh.sql +++ b/tests/queries/0_stateless/02867_create_user_ssh.sql @@ -1,10 +1,16 @@ -- Tags: no-fasttest, no-parallel +-- Tests user authentication with SSH public keys + DROP USER IF EXISTS test_user_02867; -CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'clickhouse' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR } -CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'clickhouse' TYPE 'clickhouse'; -- { serverError LIBSSH_ERROR } -CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'key1' TYPE 'ssh-rsa', KEY 'key2' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR } +-- negative tests +CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'invalid_key' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR } +CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'invalid_key' TYPE 'ssh-rsa', KEY 'invalid_key' TYPE 'ssh-rsa'; -- { serverError LIBSSH_ERROR } +CREATE USER test_user_02867 IDENTIFIED WITH ssh_key +BY KEY 'AAAAB3NzaC1yc2EAAAADAQABAAABgQCVTUso7/LQcBljfsHwyuL6fWfIvS3BaVpYB8lwf/ZylSOltBy6YlABtTU3mIb197d2DW99RcLKk174f5Zj5rUukXbV0fnufWvwd37fbb1eKM8zxBYvXs53EI5QBPZgKACIzMpYYZeJnAP0oZhUfWWtKXpy/SQ5CHiEIGD9RNYDL+uXZejMwC5r/+f2AmrATBo+Y+WJFZIvhj4uznFYvyvNTUz/YDvZCk+vwwIgiv4BpFCaZm2TeETTj6SvK567bZznLP5HXrkVbB5lhxjAkahc2w/Yjm//Fwto3xsMoJwROxJEU8L1kZ40QWPqjo7Tmr6C/hL2cKDNgWOEqrjLKQmh576s1+PfxwXpVPjLK4PHVSvuJLV88sn0iPdspLlKlDCdc7T9MqIrjJfxuhqnaoFQ7U+oBte8vkm1wGu76+WEC3iNWVAiIVZxLx9rUEsDqj3OovqfLiRsTmNLeY94p2asZjkx7rU48ZwuYN5XGafYsArPscj9Ve6RoRrof+5Q7cc=' +TYPE 'invalid_algorithm'; -- { serverError LIBSSH_ERROR } + CREATE USER test_user_02867 IDENTIFIED WITH ssh_key BY KEY 'AAAAB3NzaC1yc2EAAAADAQABAAABgQCVTUso7/LQcBljfsHwyuL6fWfIvS3BaVpYB8lwf/ZylSOltBy6YlABtTU3mIb197d2DW99RcLKk174f5Zj5rUukXbV0fnufWvwd37fbb1eKM8zxBYvXs53EI5QBPZgKACIzMpYYZeJnAP0oZhUfWWtKXpy/SQ5CHiEIGD9RNYDL+uXZejMwC5r/+f2AmrATBo+Y+WJFZIvhj4uznFYvyvNTUz/YDvZCk+vwwIgiv4BpFCaZm2TeETTj6SvK567bZznLP5HXrkVbB5lhxjAkahc2w/Yjm//Fwto3xsMoJwROxJEU8L1kZ40QWPqjo7Tmr6C/hL2cKDNgWOEqrjLKQmh576s1+PfxwXpVPjLK4PHVSvuJLV88sn0iPdspLlKlDCdc7T9MqIrjJfxuhqnaoFQ7U+oBte8vkm1wGu76+WEC3iNWVAiIVZxLx9rUEsDqj3OovqfLiRsTmNLeY94p2asZjkx7rU48ZwuYN5XGafYsArPscj9Ve6RoRrof+5Q7cc=' TYPE 'ssh-rsa'; From 0be983ee22130613c01504c328f4ec992fd728b9 Mon Sep 17 00:00:00 2001 From: avogar Date: Fri, 5 Apr 2024 09:16:00 +0000 Subject: [PATCH 283/470] Fix test --- src/Interpreters/Context.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index faee0602f563..e4d5d895ceb6 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,7 +48,7 @@ #include #include #include -#include +//#include #include #include #include @@ -1658,7 +1658,7 @@ void Context::addScalar(const String & name, const Block & block) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have scalars"); std::lock_guard lock(mutex); - scalars.emplace(name, block); + scalars[name] = block; } @@ -1668,7 +1668,7 @@ void Context::addSpecialScalar(const String & name, const Block & block) throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context cannot have local scalars"); std::lock_guard lock(mutex); - special_scalars.emplace(name, block); + special_scalars[name] = block; } From c3aa9323677139e49552710265d7ce96f6e023de Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Fri, 5 Apr 2024 11:39:28 +0200 Subject: [PATCH 284/470] Uncommonly header --- src/Interpreters/Context.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e4d5d895ceb6..f8a46ec30b49 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -48,7 +48,7 @@ #include #include #include -//#include +#include #include #include #include From ce1f5144177c404c955bd006f0428ee932ad49ac Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Apr 2024 10:39:05 +0000 Subject: [PATCH 285/470] Fix optimize_uniq_to_count when only prefix of key is matched --- src/Analyzer/Passes/UniqToCountPass.cpp | 13 +++++++++++-- .../02990_optimize_uniq_to_count_alias.reference | 1 + .../02990_optimize_uniq_to_count_alias.sql | 15 +++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/UniqToCountPass.cpp b/src/Analyzer/Passes/UniqToCountPass.cpp index d7d11e9a5802..b801865c9a5a 100644 --- a/src/Analyzer/Passes/UniqToCountPass.cpp +++ b/src/Analyzer/Passes/UniqToCountPass.cpp @@ -29,7 +29,8 @@ NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) return {}; NamesAndTypes result; - for (const auto & group_by_ele : query_node->getGroupByNode()->getChildren()) + const auto & group_by_elements = query_node->getGroupByNode()->getChildren(); + for (const auto & group_by_element : group_by_elements) { const auto & projection_columns = query_node->getProjectionColumns(); const auto & projection_nodes = query_node->getProjection().getNodes(); @@ -38,10 +39,18 @@ NamesAndTypes extractProjectionColumnsForGroupBy(const QueryNode * query_node) for (size_t i = 0; i < projection_columns.size(); i++) { - if (projection_nodes[i]->isEqual(*group_by_ele)) + if (projection_nodes[i]->isEqual(*group_by_element)) + { result.push_back(projection_columns[i]); + break; + } } } + /// If some group by keys are not matched, we cannot apply optimization, + /// because prefix of group by keys may not be unique. + if (result.size() != group_by_elements.size()) + return {}; + return result; } diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference index 6ed281c757a9..e8183f05f5db 100644 --- a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.reference @@ -1,2 +1,3 @@ 1 1 +1 diff --git a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql index 5ba0be399912..54d19264c452 100644 --- a/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql +++ b/tests/queries/0_stateless/02990_optimize_uniq_to_count_alias.sql @@ -34,4 +34,19 @@ FROM ) AS t ) SETTINGS optimize_uniq_to_count=1; +-- https://github.com/ClickHouse/ClickHouse/issues/62298 +DROP TABLE IF EXISTS users; +CREATE TABLE users +( + `id` Int64, + `name` String +) +ENGINE = ReplacingMergeTree +ORDER BY (id, name); + +INSERT INTO users VALUES (1, 'pufit'), (1, 'pufit2'), (1, 'pufit3'); + +SELECT uniqExact(id) FROM ( SELECT id FROM users WHERE id = 1 GROUP BY id, name ); + +DROP TABLE IF EXISTS users; DROP TABLE IF EXISTS tags; From 500c3fe0fcb197f7d8b2f0a6148480727015acf1 Mon Sep 17 00:00:00 2001 From: Sean Haynes Date: Fri, 5 Apr 2024 10:38:28 +0000 Subject: [PATCH 286/470] Fix small typo in Dictionary source loader --- src/Interpreters/ExternalLoader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index 36664cbd06fb..53e91971d920 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -1186,7 +1186,7 @@ class ExternalLoader::LoadingDispatcher : private boost::noncopyable else { auto result = std::chrono::system_clock::now() + std::chrono::seconds(calculateDurationWithBackoff(rnd_engine, error_count)); - LOG_TRACE(log, "Supposed update time for unspecified object is {} (backoff, {} errors.", to_string(result), error_count); + LOG_TRACE(log, "Supposed update time for unspecified object is {} (backoff, {} errors)", to_string(result), error_count); return result; } } From 6428868843eb4666a3ec1defff662f673c8a5e37 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 5 Apr 2024 12:42:03 +0200 Subject: [PATCH 287/470] Fix build --- src/Interpreters/Cache/LRUFileCachePriority.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 9424156a9fbb..5d75c9cb18c8 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -259,9 +259,9 @@ bool LRUFileCachePriority::canFit( const size_t * max_elements_) const { return (max_size == 0 - || (state->current_size + size - released_size_assumption <= (max_size_ ? *max_size_ : max_size))) + || (state->current_size + size - released_size_assumption <= (max_size_ ? *max_size_ : max_size.load()))) && (max_elements == 0 - || state->current_elements_num + elements - released_elements_assumption <= (max_elements_ ? *max_elements_ : max_elements)); + || state->current_elements_num + elements - released_elements_assumption <= (max_elements_ ? *max_elements_ : max_elements.load())); } bool LRUFileCachePriority::collectCandidatesForEviction( From 07893fab631a06524658311411f53e00ef758dd3 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Apr 2024 12:01:39 +0000 Subject: [PATCH 288/470] Revert "Fixing 02535_analyzer_group_by_use_nulls" This reverts commit 3b6ea659dfbfe25983bf1cdbdaac51ce38f6d73b. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 29 ++++++++++------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 837d309d0312..cab6dd268ea5 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6679,48 +6679,45 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR if (query_node_typed.isGroupByWithGroupingSets()) { - QueryTreeNodes nullable_group_by_keys; for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes()) { if (settings.enable_positional_arguments) replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); + resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) auto & group_by_list = grouping_sets_keys_list_node->as().getNodes(); expandTuplesInList(group_by_list); - - if (scope.group_by_use_nulls) - for (const auto & group_by_elem : group_by_list) - nullable_group_by_keys.push_back(group_by_elem->clone()); - - resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); } - for (auto & nullable_group_by_key : nullable_group_by_keys) - scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key)); + if (scope.group_by_use_nulls) + { + for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes()) + { + for (const auto & group_by_elem : grouping_set->as()->getNodes()) + scope.nullable_group_by_keys.insert(group_by_elem); + } + } } else { if (settings.enable_positional_arguments) replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); + resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); + // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) auto & group_by_list = query_node_typed.getGroupBy().getNodes(); expandTuplesInList(group_by_list); - QueryTreeNodes nullable_group_by_keys; if (scope.group_by_use_nulls) { for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes()) - nullable_group_by_keys.push_back(group_by_elem->clone()); + scope.nullable_group_by_keys.insert(group_by_elem); } - - resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); - - for (auto & nullable_group_by_key : nullable_group_by_keys) - scope.nullable_group_by_keys.insert(std::move(nullable_group_by_key)); } } From f7fdb2c4555db5530abfc2a7b10342d1e6e0217d Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Fri, 5 Apr 2024 14:23:04 +0200 Subject: [PATCH 289/470] More complex locking in StackTrace --- src/Common/StackTrace.cpp | 69 ++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 891850ccb79f..78ab43e89919 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -18,13 +18,10 @@ #include #include #include -#include #include #include #include -#include "config.h" - #include #if defined(OS_DARWIN) @@ -481,7 +478,17 @@ void StackTrace::toStringEveryLine(void ** frame_pointers_raw, size_t offset, si toStringEveryLineImpl(true, {frame_pointers, offset, size}, std::move(callback)); } -using StackTraceCache = std::map>; +struct CacheEntry +{ + std::optional stacktrace_string; + bool to_string_in_progress = false; + + std::condition_variable cv; +}; + +using CacheEntryPtr = std::shared_ptr; + +using StackTraceCache = std::map>; static StackTraceCache & cacheInstance() { @@ -493,23 +500,63 @@ static std::mutex stacktrace_cache_mutex; String toStringCached(const StackTrace::FramePointers & pointers, size_t offset, size_t size) { + const StackTraceRefTriple key{pointers, offset, size}; + /// Calculation of stack trace text is extremely slow. /// We use simple cache because otherwise the server could be overloaded by trash queries. /// Note that this cache can grow unconditionally, but practically it should be small. - std::lock_guard lock{stacktrace_cache_mutex}; - + std::unique_lock lock{stacktrace_cache_mutex}; + CacheEntryPtr cache_entry; StackTraceCache & cache = cacheInstance(); - const StackTraceRefTriple key{pointers, offset, size}; - if (auto it = cache.find(key); it != cache.end()) - return it->second; + { + cache_entry = it->second; + } else + { + auto [new_it, inserted] = cache.emplace(StackTraceTriple{pointers, offset, size}, std::make_shared()); + chassert(inserted); + cache_entry = new_it->second; + } + + if (!cache_entry->to_string_in_progress && cache_entry->stacktrace_string.has_value()) + return *cache_entry->stacktrace_string; + + if (cache_entry->to_string_in_progress) + { + cache_entry->cv.wait(lock, [&]{ return !cache_entry->to_string_in_progress; }); + + if (cache_entry->stacktrace_string.has_value()) + return *cache_entry->stacktrace_string; + } + + cache_entry->to_string_in_progress = true; + + lock.unlock(); + + String stacktrace_string; + try { DB::WriteBufferFromOwnString out; toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; }); - - return cache.emplace(StackTraceTriple{pointers, offset, size}, out.str()).first->second; + stacktrace_string = out.str(); } + catch (...) + { + lock.lock(); + cache_entry->to_string_in_progress = false; + lock.unlock(); + cache_entry->cv.notify_one(); + throw; + } + + lock.lock(); + cache_entry->to_string_in_progress = false; + cache_entry->stacktrace_string = stacktrace_string; + lock.unlock(); + + cache_entry->cv.notify_all(); + return stacktrace_string; } std::string StackTrace::toString() const From 8b2c719aa665b0af7d2190c29ad842d9b0aa1fdd Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Apr 2024 12:57:02 +0000 Subject: [PATCH 290/470] Do not build multithread insert pipeline for tables without support --- src/Interpreters/InterpreterInsertQuery.cpp | 3 ++- src/QueryPipeline/Pipe.cpp | 2 -- .../03035_max_insert_threads_support.reference | 1 + .../03035_max_insert_threads_support.sh | 14 ++++++++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/03035_max_insert_threads_support.reference create mode 100755 tests/queries/0_stateless/03035_max_insert_threads_support.sh diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fc58f7b50988..d2eda928d8c5 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -522,7 +522,8 @@ BlockIO InterpreterInsertQuery::execute() auto views = DatabaseCatalog::instance().getDependentViews(table_id); /// It breaks some views-related tests and we have dedicated `parallel_view_processing` for views, so let's just skip them. - const bool resize_to_max_insert_threads = !table->isView() && views.empty(); + /// Also it doesn't make sense to reshuffle data if storage doesn't support parallel inserts. + const bool resize_to_max_insert_threads = !table->isView() && views.empty() && table->supportsParallelInsert(); pre_streams_size = resize_to_max_insert_threads ? settings.max_insert_threads : std::min(settings.max_insert_threads, pipeline.getNumStreams()); diff --git a/src/QueryPipeline/Pipe.cpp b/src/QueryPipeline/Pipe.cpp index 8050c7cc6714..34602ecccee2 100644 --- a/src/QueryPipeline/Pipe.cpp +++ b/src/QueryPipeline/Pipe.cpp @@ -13,8 +13,6 @@ #include #include -#include - namespace DB { diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.reference b/tests/queries/0_stateless/03035_max_insert_threads_support.reference new file mode 100644 index 000000000000..d00491fd7e5b --- /dev/null +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/03035_max_insert_threads_support.sh b/tests/queries/0_stateless/03035_max_insert_threads_support.sh new file mode 100755 index 000000000000..1e6bfb414d80 --- /dev/null +++ b/tests/queries/0_stateless/03035_max_insert_threads_support.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE="data_$CLICKHOUSE_TEST_UNIQUE_NAME.csv" + +$CLICKHOUSE_CLIENT --max_insert_threads=4 --query=" + EXPLAIN PIPELINE INSERT INTO FUNCTION file('$DATA_FILE') SELECT * FROM numbers_mt(1000000) ORDER BY number DESC +" | grep -o MaterializingTransform | wc -l + +DATA_FILE_PATH=$($CLICKHOUSE_CLIENT_BINARY --query "select _path from file('$DATA_FILE', 'One')") +rm $DATA_FILE_PATH From e53ba4fa9db4646ee3a0c193594379b33043bcf2 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Apr 2024 13:32:07 +0000 Subject: [PATCH 291/470] Analyzer: Fix PREWHERE with lambda functions --- src/Planner/CollectTableExpressionData.cpp | 4 +++- .../0_stateless/03036_prewhere_lambda_function.reference | 2 ++ .../0_stateless/03036_prewhere_lambda_function.sql | 8 ++++++++ 3 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03036_prewhere_lambda_function.reference create mode 100644 tests/queries/0_stateless/03036_prewhere_lambda_function.sql diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp index 385381f13552..27b5909c13b0 100644 --- a/src/Planner/CollectTableExpressionData.cpp +++ b/src/Planner/CollectTableExpressionData.cpp @@ -235,7 +235,9 @@ class CollectPrewhereTableExpressionVisitor : public ConstInDepthQueryTreeVisito static bool needChildVisit(const QueryTreeNodePtr &, const QueryTreeNodePtr & child_node) { auto child_node_type = child_node->getNodeType(); - return !(child_node_type == QueryTreeNodeType::QUERY || child_node_type == QueryTreeNodeType::UNION); + return child_node_type != QueryTreeNodeType::QUERY && + child_node_type != QueryTreeNodeType::UNION && + child_node_type != QueryTreeNodeType::LAMBDA; } private: diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.reference b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference new file mode 100644 index 000000000000..470e4427d967 --- /dev/null +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference @@ -0,0 +1,2 @@ +[4,5,6] +[4,5,6] diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.sql b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql new file mode 100644 index 000000000000..7a5da7ed689b --- /dev/null +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS t; +CREATE TABLE t (A Array(Int64)) Engine = MergeTree ORDER BY tuple(); +INSERT INTO t VALUES ([1,2,3]), ([4,5,6]), ([7,8,9]); + +SELECT * FROM t PREWHERE arrayExists(x -> x = 5, A); +SELECT * FROM t PREWHERE arrayExists(lamdba(tuple(x), x = 5), A); + +DROP TABLE t; From 54ceb3d32a7bb490ba7f202a511607f0ea21ae5b Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 5 Apr 2024 12:47:00 +0000 Subject: [PATCH 292/470] add some comments --- src/Processors/QueryPlan/PartsSplitter.cpp | 2 ++ .../test_final_bug_with_pk_columns_loading/test.py | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/PartsSplitter.cpp b/src/Processors/QueryPlan/PartsSplitter.cpp index ec51875587e6..64af48dd53c6 100644 --- a/src/Processors/QueryPlan/PartsSplitter.cpp +++ b/src/Processors/QueryPlan/PartsSplitter.cpp @@ -128,6 +128,8 @@ class IndexAccess public: explicit IndexAccess(const RangesInDataParts & parts_) : parts(parts_) { + /// Some suffix of index columns might not be loaded (see `primary_key_ratio_of_unique_prefix_values_to_skip_suffix_columns`) + /// and we need to use the same set of index columns across all parts. for (const auto & part : parts) loaded_columns = std::min(loaded_columns, part.data_part->getIndex().size()); } diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py index e710b9942dc5..61559913e053 100644 --- a/tests/integration/test_final_bug_with_pk_columns_loading/test.py +++ b/tests/integration/test_final_bug_with_pk_columns_loading/test.py @@ -19,18 +19,24 @@ def start_cluster(): cluster.shutdown() -def test_simple_query_after_restart(start_cluster): +def test_simple_query_after_index_reload(start_cluster): node.query( """ create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; + -- for this part the first columns is useless, so we have to use both insert into t select 42, number from numbers_mt(100); + + -- for this part the first columns is enough insert into t select number, number from numbers_mt(100); """ ) + # force reloading index node.restart_clickhouse() + # the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough, + # because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`) assert ( int( node.query( From bbe4b284c2f19249fe5ca25f84ed639d59779e73 Mon Sep 17 00:00:00 2001 From: Blargian Date: Fri, 5 Apr 2024 16:30:11 +0200 Subject: [PATCH 293/470] Add missing description to arrayDotProduct --- docs/en/sql-reference/functions/array-functions.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index d481996e854b..c8bb3ee7604a 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -776,6 +776,8 @@ Note that the `arrayCount` is a [higher-order function](../../sql-reference/func ## arrayDotProduct +Returns the dot product of two arrays. + **Syntax** ```sql From 0f4efdaa4788dc5fd9e4ee96ca611eb35d63a29a Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 5 Apr 2024 14:48:39 +0000 Subject: [PATCH 294/470] remove case from 03036_prewhere_lambda_function --- .../queries/0_stateless/03036_prewhere_lambda_function.reference | 1 - tests/queries/0_stateless/03036_prewhere_lambda_function.sql | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.reference b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference index 470e4427d967..2599763b762f 100644 --- a/tests/queries/0_stateless/03036_prewhere_lambda_function.reference +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.reference @@ -1,2 +1 @@ [4,5,6] -[4,5,6] diff --git a/tests/queries/0_stateless/03036_prewhere_lambda_function.sql b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql index 7a5da7ed689b..8b9ebb775a37 100644 --- a/tests/queries/0_stateless/03036_prewhere_lambda_function.sql +++ b/tests/queries/0_stateless/03036_prewhere_lambda_function.sql @@ -3,6 +3,5 @@ CREATE TABLE t (A Array(Int64)) Engine = MergeTree ORDER BY tuple(); INSERT INTO t VALUES ([1,2,3]), ([4,5,6]), ([7,8,9]); SELECT * FROM t PREWHERE arrayExists(x -> x = 5, A); -SELECT * FROM t PREWHERE arrayExists(lamdba(tuple(x), x = 5), A); DROP TABLE t; From 39d706ba9f0c8e7f8c8d757e215f639f7d510fe2 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 5 Apr 2024 14:45:51 +0000 Subject: [PATCH 295/470] rework test --- .../__init__.py | 0 .../test.py | 53 ------------------- ...s_splitter_bug_and_index_loading.reference | 1 + ...3_parts_splitter_bug_and_index_loading.sql | 17 ++++++ 4 files changed, 18 insertions(+), 53 deletions(-) delete mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/__init__.py delete mode 100644 tests/integration/test_final_bug_with_pk_columns_loading/test.py create mode 100644 tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference create mode 100644 tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py b/tests/integration/test_final_bug_with_pk_columns_loading/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/integration/test_final_bug_with_pk_columns_loading/test.py b/tests/integration/test_final_bug_with_pk_columns_loading/test.py deleted file mode 100644 index 61559913e053..000000000000 --- a/tests/integration/test_final_bug_with_pk_columns_loading/test.py +++ /dev/null @@ -1,53 +0,0 @@ -import pytest -import logging - -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", stay_alive=True) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - logging.info("Starting cluster...") - cluster.start() - logging.info("Cluster started") - - yield cluster - finally: - cluster.shutdown() - - -def test_simple_query_after_index_reload(start_cluster): - node.query( - """ - create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; - - -- for this part the first columns is useless, so we have to use both - insert into t select 42, number from numbers_mt(100); - - -- for this part the first columns is enough - insert into t select number, number from numbers_mt(100); - """ - ) - - # force reloading index - node.restart_clickhouse() - - # the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough, - # because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`) - assert ( - int( - node.query( - "select count() from t where not ignore(*)", - settings={ - "max_threads": 4, - "merge_tree_min_bytes_for_concurrent_read": 1, - "merge_tree_min_rows_for_concurrent_read": 1, - "merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability": 1, - }, - ) - ) - == 200 - ) diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference new file mode 100644 index 000000000000..08839f6bb296 --- /dev/null +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.reference @@ -0,0 +1 @@ +200 diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql new file mode 100644 index 000000000000..541ac67fd24e --- /dev/null +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql @@ -0,0 +1,17 @@ +create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; + +-- for this part the first columns is useless, so we have to use both +insert into t select 42, number from numbers_mt(100); + +-- for this part the first columns is enough +insert into t select number, number from numbers_mt(100); + +-- force reloading index +detach table t; +attach table t; + +set merge_tree_min_bytes_for_concurrent_read=1, merge_tree_min_rows_for_concurrent_read=1, merge_tree_read_split_ranges_into_intersecting_and_non_intersecting_injection_probability=1.0, max_threads=4; + +-- the bug happened when we used (a, b) index values for one part and only (a) for another in PartsSplitter. even a simple count query is enough, +-- because some granules were assinged to wrong layers and hence not returned from the reading step (because they were filtered out by `FilterSortedStreamByRange`) +select count() from t where not ignore(*); From b2bcfaf344047f629879143d6bb4efa00c22f7cb Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Fri, 5 Apr 2024 17:18:22 +0200 Subject: [PATCH 296/470] Reduce log levels for ReadWriteBufferFromHTTP retries --- src/IO/ReadWriteBufferFromHTTP.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/IO/ReadWriteBufferFromHTTP.cpp b/src/IO/ReadWriteBufferFromHTTP.cpp index c99b08d0c9dd..303ffb744b55 100644 --- a/src/IO/ReadWriteBufferFromHTTP.cpp +++ b/src/IO/ReadWriteBufferFromHTTP.cpp @@ -345,7 +345,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, if (last_attempt || !is_retriable) { if (!mute_logging) - LOG_ERROR(log, + LOG_DEBUG(log, "Failed to make request to '{}'{}. " "Error: '{}'. " "Failed at try {}/{}.", @@ -361,7 +361,7 @@ void ReadWriteBufferFromHTTP::doWithRetries(std::function && callable, on_retry(); if (!mute_logging) - LOG_INFO(log, + LOG_TRACE(log, "Failed to make request to '{}'{}. " "Error: {}. " "Failed at try {}/{}. " From 7d50bb8c4bf2f7d51d5aece0bb42a1ca4e8afac0 Mon Sep 17 00:00:00 2001 From: Arthur Passos Date: Fri, 5 Apr 2024 12:33:43 -0300 Subject: [PATCH 297/470] fix ut once again --- src/IO/tests/gtest_s3_uri.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/IO/tests/gtest_s3_uri.cpp b/src/IO/tests/gtest_s3_uri.cpp index 175550accccd..0ec28f800727 100644 --- a/src/IO/tests/gtest_s3_uri.cpp +++ b/src/IO/tests/gtest_s3_uri.cpp @@ -96,7 +96,7 @@ const TestCase TestCases[] = { false}, // Zonal {S3::URI("https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com/root/nested/file.txt"), - "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w.s3.us-east-1.vpce.amazonaws.com", + "https://bucket.vpce-07a1cd78f1bd55c5f-j3a3vg6w-us-east-1a.s3.us-east-1.vpce.amazonaws.com", "root", "nested/file.txt", "", From 6e413223c2560007bab6422117e4d284c3aefdd4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 5 Apr 2024 09:47:24 +0200 Subject: [PATCH 298/470] Use DETACHED_DIR_NAME everywhere Signed-off-by: Azat Khuzhin --- .../MergeTree/DataPartStorageOnDiskBase.cpp | 10 ++++--- src/Storages/MergeTree/DataPartsExchange.cpp | 4 +-- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 4 +-- src/Storages/MergeTree/MergeTreeData.cpp | 27 +++++++++---------- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 16 +++++------ 6 files changed, 32 insertions(+), 31 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 18e4c87b298d..052e3ba4b744 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include #include namespace DB @@ -64,7 +66,7 @@ std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Logger auto full_relative_path = fs::path(root_path); if (detached) - full_relative_path /= "detached"; + full_relative_path /= MergeTreeData::DETACHED_DIR_NAME; std::optional original_checksums_content; std::optional original_files_list; @@ -109,7 +111,7 @@ bool DataPartStorageOnDiskBase::looksLikeBrokenDetachedPartHasTheSameContent(con if (!exists("checksums.txt")) return false; - auto storage_from_detached = create(volume, fs::path(root_path) / "detached", detached_part_path, /*initialize=*/ true); + auto storage_from_detached = create(volume, fs::path(root_path) / MergeTreeData::DETACHED_DIR_NAME, detached_part_path, /*initialize=*/ true); if (!storage_from_detached->exists("checksums.txt")) return false; @@ -490,7 +492,7 @@ MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); /// Do not initialize storage in case of DETACH because part may be broken. - bool to_detached = dir_path.starts_with("detached/"); + bool to_detached = dir_path.starts_with(std::string_view((fs::path(MergeTreeData::DETACHED_DIR_NAME) / "").string())); return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached && !params.external_transaction); } @@ -618,7 +620,7 @@ void DataPartStorageOnDiskBase::remove( if (part_dir_without_slash.has_parent_path()) { auto parent_path = part_dir_without_slash.parent_path(); - if (parent_path == "detached") + if (parent_path == MergeTreeData::DETACHED_DIR_NAME) throw Exception( ErrorCodes::LOGICAL_ERROR, "Trying to remove detached part {} with path {} in remove function. It shouldn't happen", diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 91444d76a521..cf7889c0aee5 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -21,7 +22,6 @@ #include #include #include -#include #include @@ -803,7 +803,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( throw Exception(ErrorCodes::LOGICAL_ERROR, "`tmp_prefix` and `part_name` cannot be empty or contain '.' or '/' characters."); auto part_dir = tmp_prefix + part_name; - auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); + auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? MergeTreeData::DETACHED_DIR_NAME : ""); auto volume = std::make_shared("volume_" + part_name, disk); /// Create temporary part storage to write sent files. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 8da46b39801e..441437855abf 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1844,7 +1844,7 @@ try } catch (...) { - if (startsWith(new_relative_path, "detached/")) + if (startsWith(new_relative_path, fs::path(MergeTreeData::DETACHED_DIR_NAME) / "")) { // Don't throw when the destination is to the detached folder. It might be able to // recover in some cases, such as fetching parts into multi-disks while some of the @@ -1957,7 +1957,7 @@ std::optional IMergeTreeDataPart::getRelativePathForDetachedPart(const S DetachedPartInfo::DETACH_REASONS.end(), prefix) != DetachedPartInfo::DETACH_REASONS.end()); if (auto path = getRelativePathForPrefix(prefix, /* detached */ true, broken)) - return "detached/" + *path; + return fs::path(MergeTreeData::DETACHED_DIR_NAME) / *path; return {}; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 461d9a31eaa9..dc15b8ab940c 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -262,7 +262,7 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (need_create_directories) { disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->createDirectories(fs::path(relative_data_path) / DETACHED_DIR_NAME); } if (disk->exists(format_version_path)) @@ -1713,7 +1713,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks, std::optionalname(), "tmp") || it->name() == MergeTreeData::FORMAT_VERSION_FILE_NAME - || it->name() == MergeTreeData::DETACHED_DIR_NAME) + || it->name() == DETACHED_DIR_NAME) continue; if (auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version)) @@ -2796,7 +2796,7 @@ void MergeTreeData::dropAllData() && settings_ptr->allow_remote_fs_zero_copy_replication; try { - bool keep_shared = removeDetachedPart(part.disk, fs::path(relative_data_path) / "detached" / part.dir_name / "", part.dir_name); + bool keep_shared = removeDetachedPart(part.disk, fs::path(relative_data_path) / DETACHED_DIR_NAME / part.dir_name / "", part.dir_name); LOG_DEBUG(log, "Dropped detached part {}, keep shared data: {}", part.dir_name, keep_shared); } catch (...) @@ -2879,8 +2879,8 @@ void MergeTreeData::dropIfEmpty() if (disk->isBroken()) continue; /// Non recursive, exception is thrown if there are more files. - disk->removeFileIfExists(fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME); - disk->removeDirectory(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->removeFileIfExists(fs::path(relative_data_path) / FORMAT_VERSION_FILE_NAME); + disk->removeDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME); disk->removeDirectory(relative_data_path); } } @@ -3443,7 +3443,7 @@ void MergeTreeData::changeSettings( { auto disk = new_storage_policy->getDiskByName(disk_name); disk->createDirectories(relative_data_path); - disk->createDirectories(fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME); + disk->createDirectories(fs::path(relative_data_path) / DETACHED_DIR_NAME); } /// FIXME how would that be done while reloading configuration??? @@ -6037,7 +6037,7 @@ DetachedPartsInfo MergeTreeData::getDetachedParts() const for (const auto & disk : getDisks()) { - String detached_path = fs::path(relative_data_path) / MergeTreeData::DETACHED_DIR_NAME; + String detached_path = fs::path(relative_data_path) / DETACHED_DIR_NAME; /// Note: we don't care about TOCTOU issue here. if (disk->exists(detached_path)) @@ -6063,7 +6063,7 @@ void MergeTreeData::validateDetachedPartName(const String & name) void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr local_context) { - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); if (part) { @@ -6088,7 +6088,7 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) { - bool keep_shared = removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); + bool keep_shared = removeDetachedPart(disk, fs::path(relative_data_path) / DETACHED_DIR_NAME / new_name / "", old_name); LOG_DEBUG(log, "Dropped detached part {}, keep shared data: {}", old_name, keep_shared); old_name.clear(); } @@ -6097,14 +6097,14 @@ void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const ASTPtr & partition, bool attach_part, ContextPtr local_context, PartsTemporaryRename & renamed_parts) { - const String source_dir = "detached/"; + const fs::path source_dir = DETACHED_DIR_NAME; /// Let's compose a list of parts that should be added. if (attach_part) { const String part_id = partition->as().value.safeGet(); validateDetachedPartName(part_id); - if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id)) + if (temporary_parts.contains(source_dir / part_id)) { LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " "probably it's being detached right now", part_id); @@ -6181,7 +6181,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const LOG_DEBUG(log, "Checking part {}", new_name); auto single_disk_volume = std::make_shared("volume_" + old_name, disk); - auto part = getDataPartBuilder(old_name, single_disk_volume, source_dir + new_name) + auto part = getDataPartBuilder(old_name, single_disk_volume, source_dir / new_name) .withPartFormatFromDisk() .build(); @@ -7212,11 +7212,10 @@ String MergeTreeData::getFullPathOnDisk(const DiskPtr & disk) const DiskPtr MergeTreeData::tryGetDiskForDetachedPart(const String & part_name) const { - String additional_path = "detached/"; const auto disks = getStoragePolicy()->getDisks(); for (const DiskPtr & disk : disks) - if (disk->exists(fs::path(relative_data_path) / additional_path / part_name)) + if (disk->exists(fs::path(relative_data_path) / DETACHED_DIR_NAME / part_name)) return disk; return nullptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c9f451b6bb17..6861b615cd65 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2024,7 +2024,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition( bool attach_part, ContextPtr local_context) { PartitionCommandsResultInfo results; - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, local_context, renamed_parts); for (size_t i = 0; i < loaded_parts.size(); ++i) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 6ab56ba141c1..73354e71e71c 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1983,7 +1983,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { - for (const auto it = disk->iterateDirectory(fs::path(relative_data_path) / "detached/"); it->isValid(); it->next()) + for (const auto it = disk->iterateDirectory(fs::path(relative_data_path) / DETACHED_DIR_NAME); it->isValid(); it->next()) { const auto part_info = MergeTreePartInfo::tryParsePartName(it->name(), format_version); @@ -1993,7 +1993,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo const auto part_old_name = part_info->getPartNameV1(); const auto volume = std::make_shared("volume_" + part_old_name, disk); - auto part = getDataPartBuilder(entry.new_part_name, volume, fs::path("detached") / part_old_name) + auto part = getDataPartBuilder(entry.new_part_name, volume, fs::path(DETACHED_DIR_NAME) / part_old_name) .withPartFormatFromDisk() .build(); @@ -2440,7 +2440,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { String part_dir = part_to_detach->getDataPartStorage().getPartDirectory(); LOG_INFO(log, "Detaching {}", part_dir); - auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); + auto holder = getTemporaryPartDirectoryHolder(fs::path(DETACHED_DIR_NAME) / part_dir); part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -2967,7 +2967,7 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr part = get_part(); // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo("detached/" + entry.new_part_name, true); + part->renameTo(fs::path(DETACHED_DIR_NAME) / entry.new_part_name, true); LOG_INFO(log, "Cloned part {} to detached directory", part->name); } @@ -4987,7 +4987,7 @@ bool StorageReplicatedMergeTree::fetchPart( { // The fetched part is valuable and should not be cleaned like a temp part. part->is_temp = false; - part->renameTo(fs::path("detached") / part_name, true); + part->renameTo(fs::path(DETACHED_DIR_NAME) / part_name, true); } } catch (const Exception & e) @@ -6547,7 +6547,7 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( assertNotReadonly(); PartitionCommandsResultInfo results; - PartsTemporaryRename renamed_parts(*this, "detached/"); + PartsTemporaryRename renamed_parts(*this, DETACHED_DIR_NAME); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); /// TODO Allow to use quorum here. @@ -9986,7 +9986,7 @@ bool StorageReplicatedMergeTree::checkIfDetachedPartExists(const String & part_n { fs::directory_iterator dir_end; for (const std::string & path : getDataPaths()) - for (fs::directory_iterator dir_it{fs::path(path) / "detached/"}; dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it{fs::path(path) / DETACHED_DIR_NAME}; dir_it != dir_end; ++dir_it) if (dir_it->path().filename().string() == part_name) return true; return false; @@ -9999,7 +9999,7 @@ bool StorageReplicatedMergeTree::checkIfDetachedPartitionExists(const String & p for (const std::string & path : getDataPaths()) { - for (fs::directory_iterator dir_it{fs::path(path) / "detached/"}; dir_it != dir_end; ++dir_it) + for (fs::directory_iterator dir_it{fs::path(path) / DETACHED_DIR_NAME}; dir_it != dir_end; ++dir_it) { const String file_name = dir_it->path().filename().string(); auto part_info = MergeTreePartInfo::tryParsePartName(file_name, format_version); From b2c9cb0653f6d4857a9ea1eb98904c0b4d1d7526 Mon Sep 17 00:00:00 2001 From: Nikita Fomichev Date: Fri, 5 Apr 2024 18:19:47 +0200 Subject: [PATCH 299/470] Fix flaky tests --- .../03049_unknown_identifier_materialized_column.sql | 4 ---- tests/queries/0_stateless/03068_analyzer_distributed_join.sql | 2 ++ .../queries/0_stateless/03084_analyzer_join_column_alias.sql | 4 ++-- .../03088_analyzer_ambiguous_column_multi_call.sql | 2 ++ 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql index a1c858a329c4..276e48458317 100644 --- a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql +++ b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql @@ -1,8 +1,6 @@ -- https://github.com/ClickHouse/ClickHouse/issues/54317 SET allow_experimental_analyzer=1; DROP DATABASE IF EXISTS 03049_database; -DROP TABLE IF EXISTS 03049_database.l; -DROP TABLE IF EXISTS 03049_database.r; CREATE DATABASE 03049_database; USE 03049_database; @@ -14,5 +12,3 @@ select * from l left join r on l.y = r.y where r.ty >= 2019; select * from 03049_database.l left join 03049_database.r on l.y = r.y where r.ty >= 2019; DROP DATABASE IF EXISTS 03049_database; -DROP TABLE IF EXISTS 03049_database.l; -DROP TABLE IF EXISTS 03049_database.r; diff --git a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql index 82f58e9a7500..61b1199dc448 100644 --- a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql +++ b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql @@ -1,4 +1,6 @@ -- https://github.com/ClickHouse/ClickHouse/issues/6571 +-- Tag: no-replicated-database + SET allow_experimental_analyzer=1; CREATE TABLE LINEITEM_shard ON CLUSTER test_shard_localhost ( diff --git a/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql index 8337c0ce9878..930726898b5e 100644 --- a/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql +++ b/tests/queries/0_stateless/03084_analyzer_join_column_alias.sql @@ -1,13 +1,13 @@ -- https://github.com/ClickHouse/ClickHouse/issues/47432 SET allow_experimental_analyzer=1; -create or replace table t1 +create table t1 engine = MergeTree() order by tuple() as select 1 as user_id, 2 as level; -create or replace table t2 +create table t2 engine = MergeTree() order by tuple() as diff --git a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql index 09425d2e5036..4ca5005fa1dc 100644 --- a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql +++ b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql @@ -1,5 +1,7 @@ -- https://github.com/ClickHouse/ClickHouse/issues/61014 SET allow_experimental_analyzer=1; + +DROP DATABASE IF EXISTS test_03088; create database test_03088; create table test_03088.a (i int) engine = Log(); From d1c42668bddc9a8c99dfd0bd8f0d340b60da4569 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Fri, 5 Apr 2024 16:22:57 +0000 Subject: [PATCH 300/470] Another attempt. --- src/Analyzer/ArrayJoinNode.cpp | 4 ++-- src/Analyzer/ArrayJoinNode.h | 4 ++-- src/Analyzer/ColumnNode.cpp | 18 +++++++++----- src/Analyzer/ColumnNode.h | 4 ++-- src/Analyzer/ColumnTransformers.cpp | 12 +++++----- src/Analyzer/ColumnTransformers.h | 12 +++++----- src/Analyzer/ConstantNode.cpp | 24 ++++++++++++++----- src/Analyzer/ConstantNode.h | 9 +++---- src/Analyzer/ConstantValue.h | 10 -------- src/Analyzer/FunctionNode.cpp | 10 ++++++-- src/Analyzer/FunctionNode.h | 4 ++-- src/Analyzer/HashUtils.h | 24 +++++++++---------- src/Analyzer/IQueryTreeNode.cpp | 4 ++-- src/Analyzer/IQueryTreeNode.h | 9 +++---- src/Analyzer/IdentifierNode.cpp | 4 ++-- src/Analyzer/IdentifierNode.h | 4 ++-- src/Analyzer/InterpolateNode.cpp | 4 ++-- src/Analyzer/InterpolateNode.h | 4 ++-- src/Analyzer/JoinNode.cpp | 4 ++-- src/Analyzer/JoinNode.h | 4 ++-- src/Analyzer/LambdaNode.cpp | 4 ++-- src/Analyzer/LambdaNode.h | 4 ++-- src/Analyzer/ListNode.cpp | 4 ++-- src/Analyzer/ListNode.h | 4 ++-- src/Analyzer/MatcherNode.cpp | 4 ++-- src/Analyzer/MatcherNode.h | 4 ++-- src/Analyzer/Passes/QueryAnalysisPass.cpp | 21 ++++++++-------- src/Analyzer/QueryNode.cpp | 4 ++-- src/Analyzer/QueryNode.h | 4 ++-- src/Analyzer/SortNode.cpp | 4 ++-- src/Analyzer/SortNode.h | 4 ++-- src/Analyzer/TableFunctionNode.cpp | 4 ++-- src/Analyzer/TableFunctionNode.h | 4 ++-- src/Analyzer/TableNode.cpp | 4 ++-- src/Analyzer/TableNode.h | 4 ++-- src/Analyzer/UnionNode.cpp | 4 ++-- src/Analyzer/UnionNode.h | 4 ++-- src/Analyzer/WindowNode.cpp | 4 ++-- src/Analyzer/WindowNode.h | 4 ++-- src/Analyzer/tests/gtest_query_tree_node.cpp | 4 ++-- ...up_by_use_nulls_analyzer_crashes.reference | 2 ++ ...23_group_by_use_nulls_analyzer_crashes.sql | 2 ++ 42 files changed, 145 insertions(+), 128 deletions(-) diff --git a/src/Analyzer/ArrayJoinNode.cpp b/src/Analyzer/ArrayJoinNode.cpp index ee6bd80150d1..e817a893af41 100644 --- a/src/Analyzer/ArrayJoinNode.cpp +++ b/src/Analyzer/ArrayJoinNode.cpp @@ -33,13 +33,13 @@ void ArrayJoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_stat getJoinExpressionsNode()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool ArrayJoinNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ArrayJoinNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return is_left == rhs_typed.is_left; } -void ArrayJoinNode::updateTreeHashImpl(HashState & state) const +void ArrayJoinNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(is_left); } diff --git a/src/Analyzer/ArrayJoinNode.h b/src/Analyzer/ArrayJoinNode.h index 89cb0b7b8c10..1772e2b3ca07 100644 --- a/src/Analyzer/ArrayJoinNode.h +++ b/src/Analyzer/ArrayJoinNode.h @@ -93,9 +93,9 @@ class ArrayJoinNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ColumnNode.cpp b/src/Analyzer/ColumnNode.cpp index 3d9f5d1640ef..b8d2613871dc 100644 --- a/src/Analyzer/ColumnNode.cpp +++ b/src/Analyzer/ColumnNode.cpp @@ -70,20 +70,26 @@ void ColumnNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t } } -bool ColumnNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ColumnNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const { const auto & rhs_typed = assert_cast(rhs); - return column == rhs_typed.column; + if (column.name != rhs_typed.column.name) + return false; + + return !compare_options.compare_types || column.type->equals(*rhs_typed.column.type); } -void ColumnNode::updateTreeHashImpl(HashState & hash_state) const +void ColumnNode::updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const { hash_state.update(column.name.size()); hash_state.update(column.name); - const auto & column_type_name = column.type->getName(); - hash_state.update(column_type_name.size()); - hash_state.update(column_type_name); + if (compare_options.compare_types) + { + const auto & column_type_name = column.type->getName(); + hash_state.update(column_type_name.size()); + hash_state.update(column_type_name); + } } QueryTreeNodePtr ColumnNode::cloneImpl() const diff --git a/src/Analyzer/ColumnNode.h b/src/Analyzer/ColumnNode.h index 46e7c8eb5007..f6fac5ce7f9a 100644 --- a/src/Analyzer/ColumnNode.h +++ b/src/Analyzer/ColumnNode.h @@ -131,9 +131,9 @@ class ColumnNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ColumnTransformers.cpp b/src/Analyzer/ColumnTransformers.cpp index 3a6b9e3b2916..356344c1aec9 100644 --- a/src/Analyzer/ColumnTransformers.cpp +++ b/src/Analyzer/ColumnTransformers.cpp @@ -74,13 +74,13 @@ void ApplyColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState expression_node->dumpTreeImpl(buffer, format_state, indent + 4); } -bool ApplyColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ApplyColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return apply_transformer_type == rhs_typed.apply_transformer_type; } -void ApplyColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +void ApplyColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(getTransformerType())); hash_state.update(static_cast(getApplyTransformerType())); @@ -178,7 +178,7 @@ void ExceptColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatState } } -bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (except_transformer_type != rhs_typed.except_transformer_type || @@ -198,7 +198,7 @@ bool ExceptColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const return column_matcher->pattern() == rhs_column_matcher->pattern(); } -void ExceptColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +void ExceptColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(getTransformerType())); hash_state.update(static_cast(getExceptTransformerType())); @@ -302,13 +302,13 @@ void ReplaceColumnTransformerNode::dumpTreeImpl(WriteBuffer & buffer, FormatStat } } -bool ReplaceColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool ReplaceColumnTransformerNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return is_strict == rhs_typed.is_strict && replacements_names == rhs_typed.replacements_names; } -void ReplaceColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const +void ReplaceColumnTransformerNode::updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(getTransformerType())); diff --git a/src/Analyzer/ColumnTransformers.h b/src/Analyzer/ColumnTransformers.h index 8fa8e28f1947..9ae1f14575b7 100644 --- a/src/Analyzer/ColumnTransformers.h +++ b/src/Analyzer/ColumnTransformers.h @@ -137,9 +137,9 @@ class ApplyColumnTransformerNode final : public IColumnTransformerNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; @@ -214,9 +214,9 @@ class ExceptColumnTransformerNode final : public IColumnTransformerNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; @@ -290,9 +290,9 @@ class ReplaceColumnTransformerNode final : public IColumnTransformerNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state) const override; + void updateTreeHashImpl(IQueryTreeNode::HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ConstantNode.cpp b/src/Analyzer/ConstantNode.cpp index e26500a9886f..46c1f7fb1edf 100644 --- a/src/Analyzer/ConstantNode.cpp +++ b/src/Analyzer/ConstantNode.cpp @@ -126,17 +126,29 @@ void ConstantNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state } } -bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs) const +void ConstantNode::convertToNullable() +{ + constant_value = std::make_shared(constant_value->getValue(), makeNullableSafe(constant_value->getType())); +} + +bool ConstantNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const { const auto & rhs_typed = assert_cast(rhs); - return *constant_value == *rhs_typed.constant_value && value_string == rhs_typed.value_string; + + if (value_string != rhs_typed.value_string || constant_value->getValue() != rhs_typed.constant_value->getValue()) + return false; + + return !compare_options.compare_types || constant_value->getType()->equals(*rhs_typed.constant_value->getType()); } -void ConstantNode::updateTreeHashImpl(HashState & hash_state) const +void ConstantNode::updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const { - auto type_name = constant_value->getType()->getName(); - hash_state.update(type_name.size()); - hash_state.update(type_name); + if (compare_options.compare_types) + { + auto type_name = constant_value->getType()->getName(); + hash_state.update(type_name.size()); + hash_state.update(type_name); + } hash_state.update(value_string.size()); hash_state.update(value_string); diff --git a/src/Analyzer/ConstantNode.h b/src/Analyzer/ConstantNode.h index 98a8eb782776..0c88862b8792 100644 --- a/src/Analyzer/ConstantNode.h +++ b/src/Analyzer/ConstantNode.h @@ -87,17 +87,14 @@ class ConstantNode final : public IQueryTreeNode mask_id = id; } - void convertToNullable() override - { - constant_value = std::make_shared(constant_value->getValue(), makeNullableSafe(constant_value->getType())); - } + void convertToNullable() override; void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ConstantValue.h b/src/Analyzer/ConstantValue.h index a9e2ffd9e658..335072b92867 100644 --- a/src/Analyzer/ConstantValue.h +++ b/src/Analyzer/ConstantValue.h @@ -34,14 +34,4 @@ class ConstantValue DataTypePtr data_type; }; -inline bool operator==(const ConstantValue & lhs, const ConstantValue & rhs) -{ - return lhs.getValue() == rhs.getValue() && lhs.getType()->equals(*rhs.getType()); -} - -inline bool operator!=(const ConstantValue & lhs, const ConstantValue & rhs) -{ - return !(lhs == rhs); -} - } diff --git a/src/Analyzer/FunctionNode.cpp b/src/Analyzer/FunctionNode.cpp index e902ac2274e6..f13842cf67cc 100644 --- a/src/Analyzer/FunctionNode.cpp +++ b/src/Analyzer/FunctionNode.cpp @@ -142,7 +142,7 @@ void FunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state } } -bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const { const auto & rhs_typed = assert_cast(rhs); if (function_name != rhs_typed.function_name || isAggregateFunction() != rhs_typed.isAggregateFunction() @@ -150,6 +150,9 @@ bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const || nulls_action != rhs_typed.nulls_action) return false; + if (!compare_options.compare_types) + return true; + if (isResolved() != rhs_typed.isResolved()) return false; if (!isResolved()) @@ -168,7 +171,7 @@ bool FunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const return true; } -void FunctionNode::updateTreeHashImpl(HashState & hash_state) const +void FunctionNode::updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const { hash_state.update(function_name.size()); hash_state.update(function_name); @@ -177,6 +180,9 @@ void FunctionNode::updateTreeHashImpl(HashState & hash_state) const hash_state.update(isWindowFunction()); hash_state.update(nulls_action); + if (!compare_options.compare_types) + return; + if (!isResolved()) return; diff --git a/src/Analyzer/FunctionNode.h b/src/Analyzer/FunctionNode.h index 0ff3e6896327..8d14b7eeb0dd 100644 --- a/src/Analyzer/FunctionNode.h +++ b/src/Analyzer/FunctionNode.h @@ -208,9 +208,9 @@ class FunctionNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h index eb6aac88fe96..80f59c1eaaab 100644 --- a/src/Analyzer/HashUtils.h +++ b/src/Analyzer/HashUtils.h @@ -11,37 +11,37 @@ namespace DB * Example of usage: * std::unordered_map map; */ -template +template struct QueryTreeNodeWithHash { QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT : node(std::move(node_)) - , hash(node->getTreeHash({.compare_aliases = compare_aliases})) + , hash(node->getTreeHash({.compare_aliases = compare_aliases, .compare_types = compare_types})) {} QueryTreeNodePtrType node = nullptr; CityHash_v1_0_2::uint128 hash; }; -template -inline bool operator==(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +template +inline bool operator==(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) { - return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node, {.compare_aliases = compare_aliases}); + return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node, {.compare_aliases = compare_aliases, .compare_types = compare_types}); } -template -inline bool operator!=(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) +template +inline bool operator!=(const QueryTreeNodeWithHash & lhs, const QueryTreeNodeWithHash & rhs) { return !(lhs == rhs); } using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash; -using QueryTreeNodePtrWithHashWithoutAlias = QueryTreeNodeWithHash; +using QueryTreeNodePtrWithHashIgnoreTypes = QueryTreeNodeWithHash; using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash; using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash; using QueryTreeNodePtrWithHashSet = std::unordered_set; -using QueryTreeNodePtrWithHashWithoutAliasSet = std::unordered_set; +using QueryTreeNodePtrWithHashIgnoreTypesSet = std::unordered_set; using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set; template @@ -52,10 +52,10 @@ using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map -struct std::hash> +template +struct std::hash> { - size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const + size_t operator()(const DB::QueryTreeNodeWithHash & node_with_hash) const { return node_with_hash.hash.low64; } diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp index 7815b93c3aca..cd085babf384 100644 --- a/src/Analyzer/IQueryTreeNode.cpp +++ b/src/Analyzer/IQueryTreeNode.cpp @@ -107,7 +107,7 @@ bool IQueryTreeNode::isEqual(const IQueryTreeNode & rhs, CompareOptions compare_ } if (lhs_node_to_compare->getNodeType() != rhs_node_to_compare->getNodeType() || - !lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare)) + !lhs_node_to_compare->isEqualImpl(*rhs_node_to_compare, compare_options)) return false; if (compare_options.compare_aliases && lhs_node_to_compare->alias != rhs_node_to_compare->alias) @@ -207,7 +207,7 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash(CompareOptions compare_options) hash_state.update(node_to_process->alias); } - node_to_process->updateTreeHashImpl(hash_state); + node_to_process->updateTreeHashImpl(hash_state, compare_options); hash_state.update(node_to_process->children.size()); diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h index 92e34616c4d7..fc2cb2c53f66 100644 --- a/src/Analyzer/IQueryTreeNode.h +++ b/src/Analyzer/IQueryTreeNode.h @@ -97,6 +97,7 @@ class IQueryTreeNode : public TypePromotion struct CompareOptions { bool compare_aliases = true; + bool compare_types = true; }; /** Is tree equal to other tree with node root. @@ -104,7 +105,7 @@ class IQueryTreeNode : public TypePromotion * With default compare options aliases of query tree nodes are compared during isEqual call. * Original ASTs of query tree nodes are not compared during isEqual call. */ - bool isEqual(const IQueryTreeNode & rhs, CompareOptions compare_options = { .compare_aliases = true }) const; + bool isEqual(const IQueryTreeNode & rhs, CompareOptions compare_options = { .compare_aliases = true, .compare_types = true }) const; using Hash = CityHash_v1_0_2::uint128; using HashState = SipHash; @@ -114,7 +115,7 @@ class IQueryTreeNode : public TypePromotion * Alias of query tree node is part of query tree hash. * Original AST is not part of query tree hash. */ - Hash getTreeHash(CompareOptions compare_options = { .compare_aliases = true }) const; + Hash getTreeHash(CompareOptions compare_options = { .compare_aliases = true, .compare_types = true }) const; /// Get a deep copy of the query tree QueryTreeNodePtr clone() const; @@ -264,12 +265,12 @@ class IQueryTreeNode : public TypePromotion /** Subclass must compare its internal state with rhs node internal state and do not compare children or weak pointers to other * query tree nodes. */ - virtual bool isEqualImpl(const IQueryTreeNode & rhs) const = 0; + virtual bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions compare_options) const = 0; /** Subclass must update tree hash with its internal state and do not update tree hash for children or weak pointers to other * query tree nodes. */ - virtual void updateTreeHashImpl(HashState & hash_state) const = 0; + virtual void updateTreeHashImpl(HashState & hash_state, CompareOptions compare_options) const = 0; /** Subclass must clone its internal state and do not clone children or weak pointers to other * query tree nodes. diff --git a/src/Analyzer/IdentifierNode.cpp b/src/Analyzer/IdentifierNode.cpp index 88b3daacb125..181e75a57fd8 100644 --- a/src/Analyzer/IdentifierNode.cpp +++ b/src/Analyzer/IdentifierNode.cpp @@ -38,13 +38,13 @@ void IdentifierNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_sta } } -bool IdentifierNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool IdentifierNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return identifier == rhs_typed.identifier && table_expression_modifiers == rhs_typed.table_expression_modifiers; } -void IdentifierNode::updateTreeHashImpl(HashState & state) const +void IdentifierNode::updateTreeHashImpl(HashState & state, CompareOptions) const { const auto & identifier_name = identifier.getFullName(); state.update(identifier_name.size()); diff --git a/src/Analyzer/IdentifierNode.h b/src/Analyzer/IdentifierNode.h index 872bb14d5128..1b07f0b3765c 100644 --- a/src/Analyzer/IdentifierNode.h +++ b/src/Analyzer/IdentifierNode.h @@ -53,9 +53,9 @@ class IdentifierNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/InterpolateNode.cpp b/src/Analyzer/InterpolateNode.cpp index d78993c7b855..e4f7e22b8039 100644 --- a/src/Analyzer/InterpolateNode.cpp +++ b/src/Analyzer/InterpolateNode.cpp @@ -28,13 +28,13 @@ void InterpolateNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_st getInterpolateExpression()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool InterpolateNode::isEqualImpl(const IQueryTreeNode &) const +bool InterpolateNode::isEqualImpl(const IQueryTreeNode &, CompareOptions) const { /// No state in interpolate node return true; } -void InterpolateNode::updateTreeHashImpl(HashState &) const +void InterpolateNode::updateTreeHashImpl(HashState &, CompareOptions) const { /// No state in interpolate node } diff --git a/src/Analyzer/InterpolateNode.h b/src/Analyzer/InterpolateNode.h index c45800ebaaff..9269d3924f5b 100644 --- a/src/Analyzer/InterpolateNode.h +++ b/src/Analyzer/InterpolateNode.h @@ -53,9 +53,9 @@ class InterpolateNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/JoinNode.cpp b/src/Analyzer/JoinNode.cpp index 9b61c8b19d0d..1a440ad8abc6 100644 --- a/src/Analyzer/JoinNode.cpp +++ b/src/Analyzer/JoinNode.cpp @@ -81,13 +81,13 @@ void JoinNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si } } -bool JoinNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool JoinNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return locality == rhs_typed.locality && strictness == rhs_typed.strictness && kind == rhs_typed.kind; } -void JoinNode::updateTreeHashImpl(HashState & state) const +void JoinNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(locality); state.update(strictness); diff --git a/src/Analyzer/JoinNode.h b/src/Analyzer/JoinNode.h index 4f071e03856f..734162d95469 100644 --- a/src/Analyzer/JoinNode.h +++ b/src/Analyzer/JoinNode.h @@ -142,9 +142,9 @@ class JoinNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/LambdaNode.cpp b/src/Analyzer/LambdaNode.cpp index 4be4d69c1907..bca2616d85a9 100644 --- a/src/Analyzer/LambdaNode.cpp +++ b/src/Analyzer/LambdaNode.cpp @@ -46,13 +46,13 @@ void LambdaNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, getExpression()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool LambdaNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool LambdaNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return argument_names == rhs_typed.argument_names; } -void LambdaNode::updateTreeHashImpl(HashState & state) const +void LambdaNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(argument_names.size()); for (const auto & argument_name : argument_names) diff --git a/src/Analyzer/LambdaNode.h b/src/Analyzer/LambdaNode.h index ea44a7e8187e..0b2882125f0e 100644 --- a/src/Analyzer/LambdaNode.h +++ b/src/Analyzer/LambdaNode.h @@ -97,9 +97,9 @@ class LambdaNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/ListNode.cpp b/src/Analyzer/ListNode.cpp index 799c471d6859..217cd6cefa33 100644 --- a/src/Analyzer/ListNode.cpp +++ b/src/Analyzer/ListNode.cpp @@ -38,13 +38,13 @@ void ListNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si } } -bool ListNode::isEqualImpl(const IQueryTreeNode &) const +bool ListNode::isEqualImpl(const IQueryTreeNode &, CompareOptions) const { /// No state return true; } -void ListNode::updateTreeHashImpl(HashState &) const +void ListNode::updateTreeHashImpl(HashState &, CompareOptions) const { /// No state } diff --git a/src/Analyzer/ListNode.h b/src/Analyzer/ListNode.h index 5b1abc36ae96..379919f190fb 100644 --- a/src/Analyzer/ListNode.h +++ b/src/Analyzer/ListNode.h @@ -51,9 +51,9 @@ class ListNode final : public IQueryTreeNode const_iterator end() const { return children.end(); } protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState &) const override; + void updateTreeHashImpl(HashState &, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/MatcherNode.cpp b/src/Analyzer/MatcherNode.cpp index f573b83e5383..341c4b8eec75 100644 --- a/src/Analyzer/MatcherNode.cpp +++ b/src/Analyzer/MatcherNode.cpp @@ -160,7 +160,7 @@ void MatcherNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, } } -bool MatcherNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool MatcherNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (matcher_type != rhs_typed.matcher_type || @@ -181,7 +181,7 @@ bool MatcherNode::isEqualImpl(const IQueryTreeNode & rhs) const return columns_matcher->pattern() == rhs_columns_matcher->pattern(); } -void MatcherNode::updateTreeHashImpl(HashState & hash_state) const +void MatcherNode::updateTreeHashImpl(HashState & hash_state, CompareOptions) const { hash_state.update(static_cast(matcher_type)); diff --git a/src/Analyzer/MatcherNode.h b/src/Analyzer/MatcherNode.h index d6f077e224be..a7ec7d984c66 100644 --- a/src/Analyzer/MatcherNode.h +++ b/src/Analyzer/MatcherNode.h @@ -135,9 +135,9 @@ class MatcherNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index cab6dd268ea5..4d862639e15b 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -776,7 +776,7 @@ struct IdentifierResolveScope /// Table expression node to data std::unordered_map table_expression_node_to_data; - QueryTreeNodePtrWithHashWithoutAliasSet nullable_group_by_keys; + QueryTreeNodePtrWithHashIgnoreTypesSet nullable_group_by_keys; /// Here we count the number of nullable GROUP BY keys we met resolving expression. /// E.g. for a query `SELECT tuple(tuple(number)) FROM numbers(10) GROUP BY (number, tuple(number)) with cube` /// both `number` and `tuple(number)` would be in nullable_group_by_keys. @@ -6155,12 +6155,6 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id return resolved_expression_it->second; } - bool is_nullable_group_by_key = scope.nullable_group_by_keys.contains(node); - if (is_nullable_group_by_key) - ++scope.found_nullable_group_by_key_in_scope; - - SCOPE_EXIT(scope.found_nullable_group_by_key_in_scope -= is_nullable_group_by_key); - String node_alias = node->getAlias(); ProjectionNames result_projection_names; @@ -6452,10 +6446,14 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id validateTreeSize(node, scope.context->getSettingsRef().max_expanded_ast_elements, node_to_tree_size); - if (is_nullable_group_by_key && scope.found_nullable_group_by_key_in_scope == 1 && !scope.expressions_in_resolve_process_stack.hasAggregateFunction()) + if (!scope.expressions_in_resolve_process_stack.hasAggregateFunction()) { - node = node->clone(); - node->convertToNullable(); + auto it = scope.nullable_group_by_keys.find(node); + if (it != scope.nullable_group_by_keys.end()) + { + node = it->node->clone(); + node->convertToNullable(); + } } /** Update aliases after expression node was resolved. @@ -8028,6 +8026,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasGroupBy()) resolveGroupByNode(query_node_typed, scope); + if (scope.group_by_use_nulls) + resolved_expressions.clear(); + if (query_node_typed.hasHaving()) resolveExpressionNode(query_node_typed.getHaving(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); diff --git a/src/Analyzer/QueryNode.cpp b/src/Analyzer/QueryNode.cpp index bc7a29247e44..f1361c328dbf 100644 --- a/src/Analyzer/QueryNode.cpp +++ b/src/Analyzer/QueryNode.cpp @@ -247,7 +247,7 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s } } -bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); @@ -266,7 +266,7 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const settings_changes == rhs_typed.settings_changes; } -void QueryNode::updateTreeHashImpl(HashState & state) const +void QueryNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(is_subquery); state.update(is_cte); diff --git a/src/Analyzer/QueryNode.h b/src/Analyzer/QueryNode.h index 6f9067908dde..af187df72a80 100644 --- a/src/Analyzer/QueryNode.h +++ b/src/Analyzer/QueryNode.h @@ -589,9 +589,9 @@ class QueryNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState &) const override; + void updateTreeHashImpl(HashState &, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/SortNode.cpp b/src/Analyzer/SortNode.cpp index 8e9913af442c..795a639deea7 100644 --- a/src/Analyzer/SortNode.cpp +++ b/src/Analyzer/SortNode.cpp @@ -71,7 +71,7 @@ void SortNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, si } } -bool SortNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool SortNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (sort_direction != rhs_typed.sort_direction || @@ -89,7 +89,7 @@ bool SortNode::isEqualImpl(const IQueryTreeNode & rhs) const return collator->getLocale() == rhs_typed.collator->getLocale(); } -void SortNode::updateTreeHashImpl(HashState & hash_state) const +void SortNode::updateTreeHashImpl(HashState & hash_state, CompareOptions) const { hash_state.update(sort_direction); /// use some determined value if `nulls_sort_direction` is `nullopt` diff --git a/src/Analyzer/SortNode.h b/src/Analyzer/SortNode.h index b860fd19a900..4d1f6f7c0f0d 100644 --- a/src/Analyzer/SortNode.h +++ b/src/Analyzer/SortNode.h @@ -131,9 +131,9 @@ class SortNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/TableFunctionNode.cpp b/src/Analyzer/TableFunctionNode.cpp index e5158a06373c..87d2fdcffb52 100644 --- a/src/Analyzer/TableFunctionNode.cpp +++ b/src/Analyzer/TableFunctionNode.cpp @@ -82,7 +82,7 @@ void TableFunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_ } } -bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); if (table_function_name != rhs_typed.table_function_name) @@ -97,7 +97,7 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const return table_expression_modifiers == rhs_typed.table_expression_modifiers; } -void TableFunctionNode::updateTreeHashImpl(HashState & state) const +void TableFunctionNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(table_function_name.size()); state.update(table_function_name); diff --git a/src/Analyzer/TableFunctionNode.h b/src/Analyzer/TableFunctionNode.h index 69237ac84167..98121ef95c59 100644 --- a/src/Analyzer/TableFunctionNode.h +++ b/src/Analyzer/TableFunctionNode.h @@ -155,9 +155,9 @@ class TableFunctionNode : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/TableNode.cpp b/src/Analyzer/TableNode.cpp index f899c1ae6fe9..daf5db08551d 100644 --- a/src/Analyzer/TableNode.cpp +++ b/src/Analyzer/TableNode.cpp @@ -52,14 +52,14 @@ void TableNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s } } -bool TableNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool TableNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return storage_id == rhs_typed.storage_id && table_expression_modifiers == rhs_typed.table_expression_modifiers && temporary_table_name == rhs_typed.temporary_table_name; } -void TableNode::updateTreeHashImpl(HashState & state) const +void TableNode::updateTreeHashImpl(HashState & state, CompareOptions) const { if (!temporary_table_name.empty()) { diff --git a/src/Analyzer/TableNode.h b/src/Analyzer/TableNode.h index b0bf91fa01b9..2d66167acd15 100644 --- a/src/Analyzer/TableNode.h +++ b/src/Analyzer/TableNode.h @@ -100,9 +100,9 @@ class TableNode : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & state) const override; + void updateTreeHashImpl(HashState & state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/UnionNode.cpp b/src/Analyzer/UnionNode.cpp index c60031265544..9bc2a197d9a6 100644 --- a/src/Analyzer/UnionNode.cpp +++ b/src/Analyzer/UnionNode.cpp @@ -145,7 +145,7 @@ void UnionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s getQueriesNode()->dumpTreeImpl(buffer, format_state, indent + 4); } -bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); @@ -153,7 +153,7 @@ bool UnionNode::isEqualImpl(const IQueryTreeNode & rhs) const union_mode == rhs_typed.union_mode; } -void UnionNode::updateTreeHashImpl(HashState & state) const +void UnionNode::updateTreeHashImpl(HashState & state, CompareOptions) const { state.update(is_subquery); state.update(is_cte); diff --git a/src/Analyzer/UnionNode.h b/src/Analyzer/UnionNode.h index 7686b73f5e06..189951f6375d 100644 --- a/src/Analyzer/UnionNode.h +++ b/src/Analyzer/UnionNode.h @@ -143,9 +143,9 @@ class UnionNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState &) const override; + void updateTreeHashImpl(HashState &, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/WindowNode.cpp b/src/Analyzer/WindowNode.cpp index 0fbe7c51bc7c..39a89ca6828f 100644 --- a/src/Analyzer/WindowNode.cpp +++ b/src/Analyzer/WindowNode.cpp @@ -80,14 +80,14 @@ void WindowNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, } } -bool WindowNode::isEqualImpl(const IQueryTreeNode & rhs) const +bool WindowNode::isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const { const auto & rhs_typed = assert_cast(rhs); return window_frame == rhs_typed.window_frame && parent_window_name == rhs_typed.parent_window_name; } -void WindowNode::updateTreeHashImpl(HashState & hash_state) const +void WindowNode::updateTreeHashImpl(HashState & hash_state, CompareOptions) const { hash_state.update(window_frame.is_default); hash_state.update(window_frame.type); diff --git a/src/Analyzer/WindowNode.h b/src/Analyzer/WindowNode.h index 30e1128b93c7..febbc02bedce 100644 --- a/src/Analyzer/WindowNode.h +++ b/src/Analyzer/WindowNode.h @@ -169,9 +169,9 @@ class WindowNode final : public IQueryTreeNode void dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, size_t indent) const override; protected: - bool isEqualImpl(const IQueryTreeNode & rhs) const override; + bool isEqualImpl(const IQueryTreeNode & rhs, CompareOptions) const override; - void updateTreeHashImpl(HashState & hash_state) const override; + void updateTreeHashImpl(HashState & hash_state, CompareOptions) const override; QueryTreeNodePtr cloneImpl() const override; diff --git a/src/Analyzer/tests/gtest_query_tree_node.cpp b/src/Analyzer/tests/gtest_query_tree_node.cpp index cf1ce78e4232..01556c9f9216 100644 --- a/src/Analyzer/tests/gtest_query_tree_node.cpp +++ b/src/Analyzer/tests/gtest_query_tree_node.cpp @@ -22,12 +22,12 @@ class SourceNode final : public IQueryTreeNode { } - bool isEqualImpl(const IQueryTreeNode &) const override + bool isEqualImpl(const IQueryTreeNode &, CompareOptions) const override { return true; } - void updateTreeHashImpl(HashState &) const override + void updateTreeHashImpl(HashState &, CompareOptions) const override { } diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference index 4081b82a8f5f..4243abb1a1e9 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference @@ -60,3 +60,5 @@ (7) (8) (9) +a b +a b diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 28042fc5b440..20cf47e4feba 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -7,3 +7,5 @@ select tuple(array(number)) as x FROM numbers(10) GROUP BY number, array(number) SELECT tuple(number) AS x FROM numbers(10) GROUP BY GROUPING SETS (number) order by x; SELECT ignore(toFixedString('Lambda as function parameter', 28), toNullable(28), ignore(8)), sum(marks) FROM system.parts GROUP BY GROUPING SETS ((2)) FORMAT Null settings optimize_injective_functions_in_group_by=1, optimize_group_by_function_keys=1, group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION } + +SELECT toLowCardinality(materialize('a' AS key)), 'b' AS value GROUP BY key WITH CUBE SETTINGS group_by_use_nulls = 1; From eb9690016a06ad544e0d819a44ad772b1d0cd2cc Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 5 Apr 2024 18:37:46 +0200 Subject: [PATCH 301/470] Fix assertion --- src/Interpreters/Cache/EvictionCandidates.h | 6 ---- src/Interpreters/Cache/FileCache.cpp | 21 +++++------- src/Interpreters/Cache/IFileCachePriority.h | 3 +- .../Cache/LRUFileCachePriority.cpp | 6 ++-- src/Interpreters/Cache/LRUFileCachePriority.h | 3 +- .../Cache/SLRUFileCachePriority.cpp | 33 +++++++++++-------- .../Cache/SLRUFileCachePriority.h | 3 +- 7 files changed, 37 insertions(+), 38 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index 140728ae7044..baacbc0cfae5 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -10,12 +10,6 @@ class EvictionCandidates : private boost::noncopyable using FinalizeEvictionFunc = std::function; EvictionCandidates() = default; - EvictionCandidates(EvictionCandidates && other) noexcept - { - candidates = std::move(other.candidates); - candidates_size = std::move(other.candidates_size); - queue_entries_to_invalidate = std::move(other.queue_entries_to_invalidate); - } ~EvictionCandidates(); void add( diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index bf76a3073647..12ea2c178bc7 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1388,8 +1388,8 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, if (new_settings.max_size != actual_settings.max_size || new_settings.max_elements != actual_settings.max_elements) { - std::optional eviction_candidates; - bool modified_size_limits = false; + EvictionCandidates eviction_candidates; + bool limits_satisfied = false; { cache_is_being_resized.store(true, std::memory_order_relaxed); SCOPE_EXIT({ @@ -1399,15 +1399,12 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, auto cache_lock = lockCache(); FileCacheReserveStat stat; - eviction_candidates.emplace(main_priority->collectCandidatesForEviction( - new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, stat, cache_lock)); + limits_satisfied = main_priority->collectCandidatesForEviction( + new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, stat, eviction_candidates, cache_lock); - eviction_candidates->removeQueueEntries(cache_lock); + eviction_candidates.removeQueueEntries(cache_lock); - modified_size_limits = main_priority->getSize(cache_lock) <= new_settings.max_size - && main_priority->getElementsCount(cache_lock) <= new_settings.max_elements; - - if (modified_size_limits) + if (limits_satisfied) { main_priority->modifySizeLimits( new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); @@ -1423,16 +1420,16 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, try { - eviction_candidates->evict(); + eviction_candidates.evict(); } catch (...) { auto cache_lock = lockCache(); - eviction_candidates->finalize(nullptr, cache_lock); + eviction_candidates.finalize(nullptr, cache_lock); throw; } - if (modified_size_limits) + if (limits_satisfied) { LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", actual_settings.max_size, new_settings.max_size, diff --git a/src/Interpreters/Cache/IFileCachePriority.h b/src/Interpreters/Cache/IFileCachePriority.h index 042234b9f6c8..a727aab68eea 100644 --- a/src/Interpreters/Cache/IFileCachePriority.h +++ b/src/Interpreters/Cache/IFileCachePriority.h @@ -147,11 +147,12 @@ class IFileCachePriority : private boost::noncopyable const CachePriorityGuard::Lock &) = 0; /// Collect eviction `candidates_num` candidates for eviction. - virtual EvictionCandidates collectCandidatesForEviction( + virtual bool collectCandidatesForEviction( size_t desired_size, size_t desired_elements_count, size_t max_candidates_to_evict, FileCacheReserveStat & stat, + EvictionCandidates & candidates, const CachePriorityGuard::Lock &) = 0; virtual bool modifySizeLimits( diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index 5d75c9cb18c8..e859529f5e77 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -322,14 +322,14 @@ bool LRUFileCachePriority::collectCandidatesForEviction( } } -EvictionCandidates LRUFileCachePriority::collectCandidatesForEviction( +bool LRUFileCachePriority::collectCandidatesForEviction( size_t desired_size, size_t desired_elements_count, size_t max_candidates_to_evict, FileCacheReserveStat & stat, + EvictionCandidates & res, const CachePriorityGuard::Lock & lock) { - EvictionCandidates res; auto stop_condition = [&, this]() { return canFit(0, 0, stat.total_stat.releasable_size, stat.total_stat.releasable_count, @@ -337,7 +337,7 @@ EvictionCandidates LRUFileCachePriority::collectCandidatesForEviction( || (max_candidates_to_evict && res.size() >= max_candidates_to_evict); }; iterateForEviction(res, stat, stop_condition, lock); - return res; + return stop_condition(); } void LRUFileCachePriority::iterateForEviction( diff --git a/src/Interpreters/Cache/LRUFileCachePriority.h b/src/Interpreters/Cache/LRUFileCachePriority.h index 1c8cfa6795f6..d31a3fb0f109 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.h +++ b/src/Interpreters/Cache/LRUFileCachePriority.h @@ -62,11 +62,12 @@ class LRUFileCachePriority final : public IFileCachePriority const UserID & user_id, const CachePriorityGuard::Lock &) override; - EvictionCandidates collectCandidatesForEviction( + bool collectCandidatesForEviction( size_t desired_size, size_t desired_elements_count, size_t max_candidates_to_evict, FileCacheReserveStat & stat, + EvictionCandidates & res, const CachePriorityGuard::Lock &) override; void shuffle(const CachePriorityGuard::Lock &) override; diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.cpp b/src/Interpreters/Cache/SLRUFileCachePriority.cpp index 802a88db23f3..68bf182dd2e0 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/SLRUFileCachePriority.cpp @@ -251,42 +251,47 @@ bool SLRUFileCachePriority::collectCandidatesForEvictionInProtected( return true; } -EvictionCandidates SLRUFileCachePriority::collectCandidatesForEviction( +bool SLRUFileCachePriority::collectCandidatesForEviction( size_t desired_size, size_t desired_elements_count, size_t max_candidates_to_evict, FileCacheReserveStat & stat, + EvictionCandidates & res, const CachePriorityGuard::Lock & lock) { const auto desired_probationary_size = getRatio(desired_size, 1 - size_ratio); const auto desired_probationary_elements_num = getRatio(desired_elements_count, 1 - size_ratio); - auto res = probationary_queue.collectCandidatesForEviction( - desired_probationary_size, desired_probationary_elements_num, max_candidates_to_evict, stat, lock); + FileCacheReserveStat probationary_stat; + const bool probationary_limit_satisfied = probationary_queue.collectCandidatesForEviction( + desired_probationary_size, desired_probationary_elements_num, + max_candidates_to_evict, probationary_stat, res, lock); - LOG_TEST(log, "Collected {} eviction candidates from probationary queue (size: {})", - res.size(), stat.total_stat.releasable_size); + stat += probationary_stat; + + LOG_TEST(log, "Collected {} to evict from probationary queue. Total size: {}", + res.size(), probationary_stat.total_stat.releasable_size); chassert(!max_candidates_to_evict || res.size() <= max_candidates_to_evict); chassert(res.size() == stat.total_stat.releasable_count); - if (max_candidates_to_evict && res.size() == max_candidates_to_evict) - return res; + if (max_candidates_to_evict && res.size() >= max_candidates_to_evict) + return probationary_limit_satisfied; const auto desired_protected_size = getRatio(max_size, size_ratio); const auto desired_protected_elements_num = getRatio(max_elements, size_ratio); FileCacheReserveStat protected_stat; - auto res_add = protected_queue.collectCandidatesForEviction( + const bool protected_limit_satisfied = protected_queue.collectCandidatesForEviction( desired_protected_size, desired_protected_elements_num, - max_candidates_to_evict ? max_candidates_to_evict - res.size() : 0, protected_stat, lock); - - LOG_TEST(log, "Collected {} eviction candidates from protected queue (size: {})", - res_add.size(), protected_stat.total_stat.releasable_size); + max_candidates_to_evict - res.size(), protected_stat, res, lock); stat += protected_stat; - res.insert(std::move(res_add), lock); - return res; + + LOG_TEST(log, "Collected {} to evict from protected queue. Total size: {}", + res.size(), protected_stat.total_stat.releasable_size); + + return probationary_limit_satisfied && protected_limit_satisfied; } void SLRUFileCachePriority::downgrade(IteratorPtr iterator, const CachePriorityGuard::Lock & lock) diff --git a/src/Interpreters/Cache/SLRUFileCachePriority.h b/src/Interpreters/Cache/SLRUFileCachePriority.h index e837b8541c23..ee3cafe322da 100644 --- a/src/Interpreters/Cache/SLRUFileCachePriority.h +++ b/src/Interpreters/Cache/SLRUFileCachePriority.h @@ -58,11 +58,12 @@ class SLRUFileCachePriority : public IFileCachePriority const UserID & user_id, const CachePriorityGuard::Lock &) override; - EvictionCandidates collectCandidatesForEviction( + bool collectCandidatesForEviction( size_t desired_size, size_t desired_elements_count, size_t max_candidates_to_evict, FileCacheReserveStat & stat, + EvictionCandidates & res, const CachePriorityGuard::Lock &) override; void shuffle(const CachePriorityGuard::Lock &) override; From 378d330d9dfa289c413f80c2addaf6dee5503093 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Fri, 5 Apr 2024 17:07:43 +0000 Subject: [PATCH 302/470] better --- .../0_stateless/03033_parts_splitter_bug_and_index_loading.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql index 541ac67fd24e..25ec1c8fd80e 100644 --- a/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql +++ b/tests/queries/0_stateless/03033_parts_splitter_bug_and_index_loading.sql @@ -1,5 +1,7 @@ create table t(a UInt32, b UInt32) engine=MergeTree order by (a, b) settings index_granularity=1; +system stop merges t; + -- for this part the first columns is useless, so we have to use both insert into t select 42, number from numbers_mt(100); From fa01f26defcaeed6c83a0866a9a786250cfdb332 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 5 Apr 2024 17:39:12 +0000 Subject: [PATCH 303/470] add test --- .../IO/ReadBufferFromAzureBlobStorage.cpp | 2 +- .../IO/WriteBufferFromAzureBlobStorage.cpp | 2 +- .../isRetryableAzureException.cpp | 10 ++- .../isRetryableAzureException.h | 0 src/Storages/MergeTree/checkDataPart.cpp | 2 +- .../test.py | 4 +- .../test.py | 78 ++++++++++++++++++- .../test_storage_azure_blob_storage/test.py | 8 +- 8 files changed, 90 insertions(+), 16 deletions(-) rename src/{Disks/ObjectStorages => IO}/AzureBlobStorage/isRetryableAzureException.cpp (66%) rename src/{Disks/ObjectStorages => IO}/AzureBlobStorage/isRetryableAzureException.h (100%) diff --git a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp index 68425c5ca18e..e4d74b640506 100644 --- a/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/ReadBufferFromAzureBlobStorage.cpp @@ -3,7 +3,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include #include #include #include diff --git a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp index 921f99ffef33..37a189e4d19b 100644 --- a/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp +++ b/src/Disks/IO/WriteBufferFromAzureBlobStorage.cpp @@ -3,7 +3,7 @@ #if USE_AZURE_BLOB_STORAGE #include -#include +#include #include #include #include diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.cpp b/src/IO/AzureBlobStorage/isRetryableAzureException.cpp similarity index 66% rename from src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.cpp rename to src/IO/AzureBlobStorage/isRetryableAzureException.cpp index e32815e96132..785d89857092 100644 --- a/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.cpp +++ b/src/IO/AzureBlobStorage/isRetryableAzureException.cpp @@ -1,10 +1,12 @@ -#include +#include "config.h" + #if USE_AZURE_BLOB_STORAGE +#include namespace DB { -bool isRetryableAzureRequestException(const Azure::Core::RequestFailedException & e) +bool isRetryableAzureException(const Azure::Core::RequestFailedException & e) { /// Always retry transport errors. if (dynamic_cast(&e)) @@ -14,6 +16,6 @@ bool isRetryableAzureRequestException(const Azure::Core::RequestFailedException return e.StatusCode >= Azure::Core::Http::HttpStatusCode::InternalServerError; } -#endif - } + +#endif diff --git a/src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.h b/src/IO/AzureBlobStorage/isRetryableAzureException.h similarity index 100% rename from src/Disks/ObjectStorages/AzureBlobStorage/isRetryableAzureException.h rename to src/IO/AzureBlobStorage/isRetryableAzureException.h diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 208da561118e..24826170e553 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include diff --git a/tests/integration/test_backup_restore_azure_blob_storage/test.py b/tests/integration/test_backup_restore_azure_blob_storage/test.py index a7c7b4395604..55c2969d8d39 100644 --- a/tests/integration/test_backup_restore_azure_blob_storage/test.py +++ b/tests/integration/test_backup_restore_azure_blob_storage/test.py @@ -66,11 +66,11 @@ def cluster(): def azure_query( - node, query, expect_error="false", try_num=10, settings={}, query_on_retry=None + node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None ): for i in range(try_num): try: - if expect_error == "true": + if expect_error: return node.query_and_get_error(query, settings=settings) else: return node.query(query, settings=settings) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index cffab672bd1e..7f77627e7935 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -714,7 +714,7 @@ def test_endpoint_error_check(cluster): """ expected_err_msg = "Expected container_name in endpoint" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) query = f""" DROP TABLE IF EXISTS test SYNC; @@ -731,7 +731,7 @@ def test_endpoint_error_check(cluster): """ expected_err_msg = "Expected account_name in endpoint" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) query = f""" DROP TABLE IF EXISTS test SYNC; @@ -748,4 +748,76 @@ def test_endpoint_error_check(cluster): """ expected_err_msg = "Expected container_name in endpoint" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) + + +def get_azure_client(container_name, port): + connection_string = ( + f"DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" + f"AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" + f"BlobEndpoint=http://127.0.0.1:{port}/devstoreaccount1;" + ) + + blob_service_client = BlobServiceClient.from_connection_string(connection_string) + return blob_service_client.get_container_client(container_name) + + +def test_azure_broken_parts(cluster): + node = cluster.instances[NODE_NAME] + account_name = "devstoreaccount1" + container_name = "cont5" + port = cluster.azurite_port + + query = f""" + DROP TABLE IF EXISTS t_azure_broken_parts SYNC; + + CREATE TABLE t_azure_broken_parts (a Int32) + ENGINE = MergeTree() ORDER BY tuple() + SETTINGS disk = disk( + type = azure_blob_storage, + endpoint = 'http://azurite1:{port}/{account_name}/{container_name}', + endpoint_contains_account_name = 'true', + account_name = 'devstoreaccount1', + account_key = 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', + skip_access_check = 0), min_bytes_for_wide_part = 0, min_bytes_for_full_part_storage = 0; + + INSERT INTO t_azure_broken_parts VALUES (1); + """ + + azure_query(node, query) + + result = azure_query(node, "SELECT count() FROM t_azure_broken_parts").strip() + assert int(result) == 1 + + result = azure_query( + node, + "SELECT count() FROM system.detached_parts WHERE table = 't_azure_broken_parts'", + ).strip() + + assert int(result) == 0 + + data_path = azure_query( + node, + "SELECT data_paths[1] FROM system.tables WHERE name = 't_azure_broken_parts'", + ).strip() + + remote_path = azure_query( + node, + f"SELECT remote_path FROM system.remote_data_paths WHERE path || local_path = '{data_path}' || 'all_1_1_0/columns.txt'", + ).strip() + + client = get_azure_client(container_name, port) + client.delete_blob(remote_path) + + azure_query(node, "DETACH TABLE t_azure_broken_parts") + azure_query(node, "ATTACH TABLE t_azure_broken_parts") + + result = azure_query(node, "SELECT count() FROM t_azure_broken_parts").strip() + assert int(result) == 0 + + result = azure_query( + node, + "SELECT count() FROM system.detached_parts WHERE table = 't_azure_broken_parts'", + ).strip() + + assert int(result) == 1 diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 7d30265e4f86..aabc93406581 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -36,11 +36,11 @@ def cluster(): def azure_query( - node, query, expect_error="false", try_num=10, settings={}, query_on_retry=None + node, query, expect_error=False, try_num=10, settings={}, query_on_retry=None ): for i in range(try_num): try: - if expect_error == "true": + if expect_error: return node.query_and_get_error(query, settings=settings) else: return node.query(query, settings=settings) @@ -793,7 +793,7 @@ def test_read_from_not_existing_container(cluster): f"'devstoreaccount1', 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', 'auto')" ) expected_err_msg = "container does not exist" - assert expected_err_msg in azure_query(node, query, expect_error="true") + assert expected_err_msg in azure_query(node, query, expect_error=True) def test_function_signatures(cluster): @@ -966,7 +966,7 @@ def test_union_schema_inference_mode(cluster): error = azure_query( node, f"desc azureBlobStorage('{storage_account_url}', 'cont', 'test_union_schema_inference*.jsonl', '{account_name}', '{account_key}', 'auto', 'auto', 'auto') settings schema_inference_mode='union', describe_compact_output=1 format TSV", - expect_error="true", + expect_error=True, ) assert "CANNOT_EXTRACT_TABLE_STRUCTURE" in error From 0bce544779bd881aa3218694545fe5a8017ee9a4 Mon Sep 17 00:00:00 2001 From: Sema Checherinda <104093494+CheSema@users.noreply.github.com> Date: Fri, 5 Apr 2024 23:07:00 +0200 Subject: [PATCH 304/470] Update base/poco/Net/src/HTTPClientSession.cpp Co-authored-by: Nikita Taranov --- base/poco/Net/src/HTTPClientSession.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index e489ab56b987..c9899266be79 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -248,7 +248,7 @@ void HTTPClientSession::setKeepAliveRequest(int request) - void HTTPClientSession::setLastRequest(Poco::Timestamp time) +void HTTPClientSession::setLastRequest(Poco::Timestamp time) { if (connected()) { From f766ec678206c0b0e5f0eac0d142583fa47d89cd Mon Sep 17 00:00:00 2001 From: Sema Checherinda Date: Fri, 5 Apr 2024 23:19:30 +0200 Subject: [PATCH 305/470] review remarks --- src/Common/HTTPConnectionPool.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Common/HTTPConnectionPool.cpp b/src/Common/HTTPConnectionPool.cpp index 7f99d6a647f0..167aeee68f33 100644 --- a/src/Common/HTTPConnectionPool.cpp +++ b/src/Common/HTTPConnectionPool.cpp @@ -213,7 +213,8 @@ class ConnectionGroup --total_connections_in_group; - const size_t reduced_warning_limit = limits.warning_limit > 10 ? limits.warning_limit - 20 : 1; + const size_t gap = 20; + const size_t reduced_warning_limit = limits.warning_limit > gap ? limits.warning_limit - gap : 1; if (mute_warning_until > 0 && total_connections_in_group < reduced_warning_limit) { LOG_WARNING(log, "Sessions count is OK in the group {}, count {}", type, total_connections_in_group); @@ -289,8 +290,7 @@ class EndpointConnectionPool : public std::enable_shared_from_thisgetConnection(timeouts); Session::assign(*new_connection); - if (Session::getKeepAliveRequest() == 0) - Session::setKeepAliveRequest(1); + Session::setKeepAliveRequest(Session::getKeepAliveRequest() + 1); } else { @@ -425,7 +425,7 @@ class EndpointConnectionPool : public std::enable_shared_from_this(args)...) , pool(std::move(pool_)) , group(group_) , metrics(std::move(metrics_)) From c0ffee763cd90da02310e94c99e04d6cc5a2afa6 Mon Sep 17 00:00:00 2001 From: Anita Hammer <166057949+anitahammer@users.noreply.github.com> Date: Sat, 6 Apr 2024 02:48:50 +0300 Subject: [PATCH 306/470] Fix contributor name vulnerability --- tests/ci/version_helper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/ci/version_helper.py b/tests/ci/version_helper.py index 30b0c2d96be2..f649732171fc 100755 --- a/tests/ci/version_helper.py +++ b/tests/ci/version_helper.py @@ -357,8 +357,9 @@ def update_contributors( # format: " 1016 Alexey Arno" shortlog = git_runner.run("git shortlog HEAD --summary") + escaping = str.maketrans({"\\": "\\\\", '"': '\\"'}) contributors = sorted( - [c.split(maxsplit=1)[-1].replace('"', r"\"") for c in shortlog.split("\n")], + [c.split(maxsplit=1)[-1].translate(escaping) for c in shortlog.split("\n")], ) contributors = [f' "{c}",' for c in contributors] From aae16fb41e960895c384fcacdbbef4940af18e28 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 6 Apr 2024 11:19:48 +0200 Subject: [PATCH 307/470] Remove macro --- src/Common/ThreadStatus.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Common/ThreadStatus.cpp b/src/Common/ThreadStatus.cpp index 3e7f27f4d4ff..ad96018a17e7 100644 --- a/src/Common/ThreadStatus.cpp +++ b/src/Common/ThreadStatus.cpp @@ -141,9 +141,6 @@ void ThreadStatus::initGlobalProfiler([[maybe_unused]] UInt64 global_profiler_re { tryLogCurrentException("ThreadStatus", "Cannot initialize GlobalProfiler"); } -#else - UNUSED(global_profiler_real_time_period); - UNUSED(global_profiler_cpu_time_period); #endif } From 56d6ec5f302f10b02bd7fd1fe47618083626a06c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Sat, 6 Apr 2024 12:13:22 +0200 Subject: [PATCH 308/470] Update 03023_group_by_use_nulls_analyzer_crashes.sql --- .../0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 20cf47e4feba..2f4c8b1c75ea 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -6,6 +6,6 @@ select tuple(array(number)) as x FROM numbers(10) GROUP BY number, array(number) SELECT tuple(number) AS x FROM numbers(10) GROUP BY GROUPING SETS (number) order by x; -SELECT ignore(toFixedString('Lambda as function parameter', 28), toNullable(28), ignore(8)), sum(marks) FROM system.parts GROUP BY GROUPING SETS ((2)) FORMAT Null settings optimize_injective_functions_in_group_by=1, optimize_group_by_function_keys=1, group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION } +SELECT ignore(toFixedString('Lambda as function parameter', 28), toNullable(28), ignore(8)), sum(marks) FROM system.parts WHERE database = currentDatabase() GROUP BY GROUPING SETS ((2)) FORMAT Null settings optimize_injective_functions_in_group_by=1, optimize_group_by_function_keys=1, group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION } SELECT toLowCardinality(materialize('a' AS key)), 'b' AS value GROUP BY key WITH CUBE SETTINGS group_by_use_nulls = 1; From 2573fb62627c76554619c8c046b45eaef02bd13d Mon Sep 17 00:00:00 2001 From: zhongyuankai <872237106@qq.com> Date: Sat, 6 Apr 2024 20:37:37 +0800 Subject: [PATCH 309/470] batter --- .../engines/table-engines/special/memory.md | 11 +++++---- src/Storages/MemorySettings.cpp | 15 +++++++++--- .../03032_storage_memory_modify_settings.sql | 23 ++++++++++--------- 3 files changed, 30 insertions(+), 19 deletions(-) diff --git a/docs/en/engines/table-engines/special/memory.md b/docs/en/engines/table-engines/special/memory.md index 4f5d8a1d5e08..f28157ebde2f 100644 --- a/docs/en/engines/table-engines/special/memory.md +++ b/docs/en/engines/table-engines/special/memory.md @@ -37,11 +37,6 @@ Upper and lower bounds can be specified to limit Memory engine table size, effec - `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block. - Default value: `0` -**Modify settings** -```sql -ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; -``` - ## Usage {#usage} @@ -50,6 +45,11 @@ ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 100 CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000; ``` +**Modify settings** +```sql +ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; +``` + **Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to. ## Examples {#examples} @@ -102,3 +102,4 @@ SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and data │ 65536 │ 10000 │ └─────────────┴────────────┘ ``` + diff --git a/src/Storages/MemorySettings.cpp b/src/Storages/MemorySettings.cpp index 4968f2b3b16c..7c8c9da209b4 100644 --- a/src/Storages/MemorySettings.cpp +++ b/src/Storages/MemorySettings.cpp @@ -44,9 +44,18 @@ ASTPtr MemorySettings::getSettingsChangesQuery() void MemorySettings::sanityCheck() const { - if (min_bytes_to_keep > max_bytes_to_keep - || min_rows_to_keep > max_rows_to_keep) - throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, "Min. bytes / rows must be set with a max."); + if (min_bytes_to_keep > max_bytes_to_keep) + throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, + "`min_bytes_to_keep` setting cannot be higher than `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}", + min_bytes_to_keep, + max_bytes_to_keep); + + + if (min_rows_to_keep > max_rows_to_keep) + throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, + "`min_rows_to_keep` setting cannot be higher than `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}", + min_rows_to_keep, + max_rows_to_keep); } } diff --git a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql index e59a5e4edb6c..1507107c37fe 100644 --- a/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql +++ b/tests/queries/0_stateless/03032_storage_memory_modify_settings.sql @@ -6,10 +6,10 @@ CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 8192 INSERT INTO memory SELECT * FROM numbers(0, 100); -- 1024 bytes INSERT INTO memory SELECT * FROM numbers(0, 3000); -- 16384 bytes -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 17408 in total ALTER TABLE memory MODIFY SETTING min_bytes_to_keep = 4096, max_bytes_to_keep = 16384; -SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 16384 in total after deleting INSERT INTO memory SELECT * FROM numbers(3000, 10000); -- 65536 bytes SELECT total_bytes FROM system.tables WHERE name = 'memory' and database = currentDatabase(); @@ -33,19 +33,19 @@ DROP TABLE IF EXISTS memory; CREATE TABLE memory (i UInt32) ENGINE = Memory; INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 50 in total INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1070 in total ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1020 in total after deleting INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total after deleting SELECT 'TESTING ADD SETTINGS'; DROP TABLE IF EXISTS memory; @@ -53,16 +53,16 @@ CREATE TABLE memory (i UInt32) ENGINE = Memory; ALTER TABLE memory MODIFY SETTING min_rows_to_keep = 100, max_rows_to_keep = 1000; INSERT INTO memory SELECT * FROM numbers(0, 50); -- 50 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 50 in total INSERT INTO memory SELECT * FROM numbers(50, 950); -- 950 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1000 in total INSERT INTO memory SELECT * FROM numbers(2000, 70); -- 70 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1020 in total after deleting INSERT INTO memory SELECT * FROM numbers(3000, 1100); -- 1100 rows -SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); +SELECT total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase(); -- 1100 in total after deleting SELECT 'TESTING INVALID SETTINGS'; DROP TABLE IF EXISTS memory; @@ -73,3 +73,4 @@ ALTER TABLE memory MODIFY SETTING max_rows_to_keep = 1000; ALTER TABLE memory MODIFY SETTING max_bytes_to_keep = 1000; DROP TABLE memory; + From b6dd55827fc5f19a763fa7b06e165cbd8b05fcea Mon Sep 17 00:00:00 2001 From: flynn Date: Sat, 6 Apr 2024 14:23:45 +0000 Subject: [PATCH 310/470] Fix analyzer with positional arguments in distributed query --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 8 ++++---- src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp | 4 ++++ ..._distributed_query_with_positional_arguments.reference | 3 +++ ...alyzer_distributed_query_with_positional_arguments.sql | 7 +++++++ 4 files changed, 18 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.reference create mode 100644 tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 6350266504f4..5d710a3fda37 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6443,7 +6443,7 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR { for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes()) { - if (settings.enable_positional_arguments) + if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); resolveExpressionNodeList(grouping_sets_keys_list_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); @@ -6465,7 +6465,7 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR } else { - if (settings.enable_positional_arguments) + if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); resolveExpressionNodeList(query_node_typed.getGroupByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); @@ -7823,7 +7823,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasOrderBy()) { - if (settings.enable_positional_arguments) + if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope); expandOrderByAll(query_node_typed, settings); @@ -7847,7 +7847,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasLimitBy()) { - if (settings.enable_positional_arguments) + if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope); resolveExpressionNodeList(query_node_typed.getLimitByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp index c8d230c87d92..d45454824770 100644 --- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp +++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp @@ -68,6 +68,10 @@ std::unique_ptr createLocalPlan( if (context->getSettingsRef().allow_experimental_analyzer) { + /// For Analyzer, identifier in GROUP BY/ORDER BY/LIMIT BY lists has been resolved to + /// ConstantNode in QueryTree if it is an alias of a constant, so we should not replace + /// ConstantNode with ProjectionNode again(https://github.com/ClickHouse/ClickHouse/issues/62289). + new_context->setSetting("enable_positional_arguments", Field(false)); auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, new_context, select_query_options); query_plan = std::make_unique(std::move(interpreter).extractQueryPlan()); } diff --git a/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.reference b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.reference new file mode 100644 index 000000000000..bb0b1cf658d1 --- /dev/null +++ b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.reference @@ -0,0 +1,3 @@ +0 +0 +0 diff --git a/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.sql b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.sql new file mode 100644 index 000000000000..16ba3b155948 --- /dev/null +++ b/tests/queries/0_stateless/03018_analyzer_distributed_query_with_positional_arguments.sql @@ -0,0 +1,7 @@ +select 0 as x +from remote('127.0.0.{1,2}', system.one) +group by x; + +select 0 as x +from remote('127.0.0.{1,2}', system.one) +order by x; From 933d6e86dab5d0c489134d7c215b4e40f0c19ed0 Mon Sep 17 00:00:00 2001 From: alesapin Date: Sat, 6 Apr 2024 19:33:54 +0200 Subject: [PATCH 311/470] Remove config --- src/Common/ThreadStatus.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Common/ThreadStatus.h b/src/Common/ThreadStatus.h index 02bf82e2da39..0c02ab8fdb0c 100644 --- a/src/Common/ThreadStatus.h +++ b/src/Common/ThreadStatus.h @@ -1,6 +1,5 @@ #pragma once -#include "config.h" #include #include #include From a6dbaf540882b18950e28ec5f1c9155e34aeadf1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2024 21:45:26 +0200 Subject: [PATCH 312/470] dhparams are not enabled by default --- programs/install/Install.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 0ff0faff6248..20c1a0ad4a86 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -662,7 +662,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv) " \n" " " << (config_dir / "server.crt").string() << "\n" " " << (config_dir / "server.key").string() << "\n" - " " << (config_dir / "dhparam.pem").string() << "\n" " \n" " \n" "\n"; From 664823463b23d00d2aa4293bdea763112b652ddb Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2024 21:46:43 +0200 Subject: [PATCH 313/470] Do not create a directory for UDF in clickhouse-client if it does not exist --- .../UserDefined/UserDefinedSQLObjectsDiskStorage.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp index b083c5400839..d874612ad04a 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsDiskStorage.cpp @@ -56,7 +56,6 @@ UserDefinedSQLObjectsDiskStorage::UserDefinedSQLObjectsDiskStorage(const Context , dir_path{makeDirectoryPathCanonical(dir_path_)} , log{getLogger("UserDefinedSQLObjectsLoaderFromDisk")} { - createDirectory(); } @@ -122,7 +121,12 @@ void UserDefinedSQLObjectsDiskStorage::reloadObjects() void UserDefinedSQLObjectsDiskStorage::loadObjectsImpl() { LOG_INFO(log, "Loading user defined objects from {}", dir_path); - createDirectory(); + + if (!std::filesystem::exists(dir_path)) + { + LOG_DEBUG(log, "The directory for user defined objects ({}) does not exist: nothing to load", dir_path); + return; + } std::vector> function_names_and_queries; @@ -157,7 +161,6 @@ void UserDefinedSQLObjectsDiskStorage::loadObjectsImpl() void UserDefinedSQLObjectsDiskStorage::reloadObject(UserDefinedSQLObjectType object_type, const String & object_name) { - createDirectory(); auto ast = tryLoadObject(object_type, object_name); if (ast) setObject(object_name, *ast); @@ -185,6 +188,7 @@ bool UserDefinedSQLObjectsDiskStorage::storeObjectImpl( bool replace_if_exists, const Settings & settings) { + createDirectory(); String file_path = getFilePath(object_type, object_name); LOG_DEBUG(log, "Storing user-defined object {} to file {}", backQuote(object_name), file_path); From 982d3db5b1fa220323c3f7d911b11b375945ca93 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sat, 6 Apr 2024 21:51:28 +0200 Subject: [PATCH 314/470] disable window view with analyzer properly --- src/Storages/WindowView/StorageWindowView.cpp | 38 +++++++++++++++++-- src/Storages/WindowView/StorageWindowView.h | 4 ++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 0b822b9aab3d..f82f5b079109 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -439,6 +439,7 @@ bool StorageWindowView::optimize( bool cleanup, ContextPtr local_context) { + throwIfWindowViewIsDisabled(local_context); auto storage_ptr = getInnerTable(); auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); return getInnerTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, cleanup, local_context); @@ -449,6 +450,7 @@ void StorageWindowView::alter( ContextPtr local_context, AlterLockHolder &) { + throwIfWindowViewIsDisabled(local_context); auto table_id = getStorageID(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); @@ -508,8 +510,9 @@ void StorageWindowView::alter( startup(); } -void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr /*local_context*/) const +void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, ContextPtr local_context) const { + throwIfWindowViewIsDisabled(local_context); for (const auto & command : commands) { if (!command.isCommentAlter() && command.type != AlterCommand::MODIFY_QUERY) @@ -519,6 +522,7 @@ void StorageWindowView::checkAlterIsPossible(const AlterCommands & commands, Con std::pair StorageWindowView::getNewBlocks(UInt32 watermark) { + throwIfWindowViewIsDisabled(); UInt32 w_start = addTime(watermark, window_kind, -window_num_units, *time_zone); auto inner_table = getInnerTable(); @@ -654,6 +658,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) inline void StorageWindowView::fire(UInt32 watermark) { + throwIfWindowViewIsDisabled(); LOG_TRACE(log, "Watch streams number: {}, target table: {}", watch_streams.size(), target_table_id.empty() ? "None" : target_table_id.getNameForLogs()); @@ -722,6 +727,7 @@ inline void StorageWindowView::fire(UInt32 watermark) ASTPtr StorageWindowView::getSourceTableSelectQuery() { + throwIfWindowViewIsDisabled(); auto query = select_query->clone(); auto & modified_select = query->as(); @@ -947,6 +953,7 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) void StorageWindowView::addFireSignal(std::set & signals) { + throwIfWindowViewIsDisabled(); std::lock_guard lock(fire_signal_mutex); for (const auto & signal : signals) fire_signal.push_back(signal); @@ -962,6 +969,7 @@ void StorageWindowView::updateMaxTimestamp(UInt32 timestamp) void StorageWindowView::updateMaxWatermark(UInt32 watermark) { + throwIfWindowViewIsDisabled(); if (is_proctime) { max_watermark = watermark; @@ -1014,6 +1022,7 @@ void StorageWindowView::cleanup() void StorageWindowView::threadFuncCleanup() { + throwIfWindowViewIsDisabled(); if (shutdown_called) return; @@ -1033,6 +1042,7 @@ void StorageWindowView::threadFuncCleanup() void StorageWindowView::threadFuncFireProc() { + throwIfWindowViewIsDisabled(); if (shutdown_called) return; @@ -1069,6 +1079,7 @@ void StorageWindowView::threadFuncFireProc() void StorageWindowView::threadFuncFireEvent() { + throwIfWindowViewIsDisabled(); std::lock_guard lock(fire_signal_mutex); LOG_TRACE(log, "Fire events: {}", fire_signal.size()); @@ -1100,6 +1111,7 @@ void StorageWindowView::read( const size_t max_block_size, const size_t num_streams) { + throwIfWindowViewIsDisabled(local_context); if (target_table_id.empty()) return; @@ -1140,6 +1152,7 @@ Pipe StorageWindowView::watch( size_t /*max_block_size*/, const size_t /*num_streams*/) { + throwIfWindowViewIsDisabled(local_context); ASTWatchQuery & query = typeid_cast(*query_info.query); bool has_limit = false; @@ -1178,8 +1191,10 @@ StorageWindowView::StorageWindowView( , clean_interval_usec(context_->getSettingsRef().window_view_clean_interval.totalMicroseconds()) { if (context_->getSettingsRef().allow_experimental_analyzer) - throw Exception(ErrorCodes::UNSUPPORTED_METHOD, - "Experimental WINDOW VIEW feature is not supported with new infrastructure for query analysis (the setting 'allow_experimental_analyzer')"); + disabled_due_to_analyzer = true; + + if (mode <= LoadingStrictnessLevel::CREATE) + throwIfWindowViewIsDisabled(); if (!query.select) throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); @@ -1243,6 +1258,9 @@ StorageWindowView::StorageWindowView( } } + if (disabled_due_to_analyzer) + return; + clean_cache_task = getContext()->getSchedulePool().createTask(getStorageID().getFullTableName(), [this] { threadFuncCleanup(); }); fire_task = getContext()->getSchedulePool().createTask( getStorageID().getFullTableName(), [this] { is_proctime ? threadFuncFireProc() : threadFuncFireEvent(); }); @@ -1400,6 +1418,7 @@ void StorageWindowView::eventTimeParser(const ASTCreateQuery & query) void StorageWindowView::writeIntoWindowView( StorageWindowView & window_view, const Block & block, ContextPtr local_context) { + window_view.throwIfWindowViewIsDisabled(local_context); while (window_view.modifying_query) std::this_thread::sleep_for(std::chrono::milliseconds(100)); @@ -1589,6 +1608,9 @@ void StorageWindowView::writeIntoWindowView( void StorageWindowView::startup() { + if (disabled_due_to_analyzer) + return; + DatabaseCatalog::instance().addViewDependency(select_table_id, getStorageID()); fire_task->activate(); @@ -1602,6 +1624,8 @@ void StorageWindowView::startup() void StorageWindowView::shutdown(bool) { shutdown_called = true; + if (disabled_due_to_analyzer) + return; fire_condition.notify_all(); @@ -1657,6 +1681,7 @@ Block StorageWindowView::getInputHeader() const const Block & StorageWindowView::getOutputHeader() const { + throwIfWindowViewIsDisabled(); std::lock_guard lock(sample_block_lock); if (!output_header) { @@ -1681,6 +1706,13 @@ StoragePtr StorageWindowView::getTargetTable() const return DatabaseCatalog::instance().getTable(target_table_id, getContext()); } +void StorageWindowView::throwIfWindowViewIsDisabled(ContextPtr local_context) const +{ + if (disabled_due_to_analyzer || (local_context && local_context->getSettingsRef().allow_experimental_analyzer)) + throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Experimental WINDOW VIEW feature is not supported " + "with new infrastructure for query analysis (the setting 'allow_experimental_analyzer')"); +} + void registerStorageWindowView(StorageFactory & factory) { factory.registerStorage("WindowView", [](const StorageFactory::Arguments & args) diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 0b7cd54e3a7f..f79867df424a 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -271,5 +271,9 @@ class StorageWindowView final : public IStorage, WithContext StoragePtr getSourceTable() const; StoragePtr getInnerTable() const; StoragePtr getTargetTable() const; + + bool disabled_due_to_analyzer = false; + + void throwIfWindowViewIsDisabled(ContextPtr local_context = nullptr) const; }; } From c5e47bbe70e232188e36d0599e29605db4905861 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2024 21:52:04 +0200 Subject: [PATCH 315/470] Add a test --- .../03033_analyzer_query_parameters.sh | 4 ++-- ...udf_user_defined_directory_in_client.reference | 1 + .../03036_udf_user_defined_directory_in_client.sh | 15 +++++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference create mode 100755 tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh diff --git a/tests/queries/0_stateless/03033_analyzer_query_parameters.sh b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh index c821791e4378..cf46067df993 100755 --- a/tests/queries/0_stateless/03033_analyzer_query_parameters.sh +++ b/tests/queries/0_stateless/03033_analyzer_query_parameters.sh @@ -4,5 +4,5 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 1" -clickhouse-local --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 0" +${CLICKHOUSE_LOCAL} --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 1" +${CLICKHOUSE_LOCAL} --param_rounding 1 --query "SELECT 1 AS x ORDER BY x WITH FILL STEP {rounding:UInt32} SETTINGS allow_experimental_analyzer = 0" diff --git a/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference new file mode 100644 index 000000000000..251d054748a5 --- /dev/null +++ b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.reference @@ -0,0 +1 @@ +Unknown function diff --git a/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh new file mode 100755 index 000000000000..e0a145d84566 --- /dev/null +++ b/tests/queries/0_stateless/03036_udf_user_defined_directory_in_client.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS test" +${CLICKHOUSE_CLIENT} --query "CREATE TABLE test (s String) ENGINE = Memory" + +# Calling an unknown function should not lead to creation of a 'user_defined' directory in the current directory +${CLICKHOUSE_CLIENT} --query "INSERT INTO test VALUES (xyz('abc'))" 2>&1 | grep -o -F 'Unknown function' + +ls -ld user_defined 2> /dev/null + +${CLICKHOUSE_CLIENT} --query "DROP TABLE test" From 97281203b8e5009412f58338ff7e7fbab34b431a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 7 Apr 2024 09:51:45 +0000 Subject: [PATCH 316/470] Don't access static members through instance, pt. II clang-tidy rightfully complains (-readability-static-accessed-through-instance) that static member functions are accessed through non-static members --- .clang-tidy | 1 - programs/disks/DisksApp.cpp | 2 +- programs/keeper-client/KeeperClient.cpp | 2 +- programs/local/LocalServer.cpp | 2 +- ...regateFunctionExponentialMovingAverage.cpp | 14 +++++----- ...AggregateFunctionKolmogorovSmirnovTest.cpp | 14 +++++----- src/AggregateFunctions/DDSketch.h | 4 +++ src/AggregateFunctions/DDSketch/Store.h | 4 +++ src/Analyzer/Passes/QueryAnalysisPass.cpp | 7 +++-- src/Client/Connection.cpp | 2 +- src/Columns/ColumnObject.cpp | 6 ++--- .../tests/gtest_compressionCodec.cpp | 2 +- src/Daemon/BaseDaemon.cpp | 2 +- src/DataTypes/DataTypeDate32.cpp | 2 +- .../tests/gtest_json_parser.cpp | 4 +-- .../IO/CachedOnDiskReadBufferFromFile.cpp | 2 +- src/Functions/FunctionsConversion.cpp | 4 +-- .../UserDefinedSQLFunctionFactory.cpp | 4 +-- src/IO/MMapReadBufferFromFileWithCache.cpp | 2 +- src/Interpreters/ActionsVisitor.cpp | 4 +-- src/Interpreters/Aggregator.cpp | 6 ++--- src/Interpreters/InterpreterAlterQuery.cpp | 4 +-- .../InterpreterCreateIndexQuery.cpp | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 6 ++--- src/Interpreters/Set.cpp | 2 +- src/Interpreters/TreeOptimizer.cpp | 2 +- src/Interpreters/executeQuery.cpp | 2 +- src/Interpreters/tests/gtest_filecache.cpp | 16 ++++++------ src/Loggers/Loggers.cpp | 4 +++ src/Processors/Merges/Algorithms/Graphite.cpp | 26 +++++++++---------- src/Storages/FileLog/StorageFileLog.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/StorageDictionary.cpp | 2 +- src/Storages/StorageFuzzJSON.cpp | 2 +- .../System/StorageSystemFunctions.cpp | 2 +- src/TableFunctions/TableFunctionExplain.cpp | 2 +- 36 files changed, 88 insertions(+), 78 deletions(-) diff --git a/.clang-tidy b/.clang-tidy index c98bee71d1ac..dc1cebe94304 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -119,7 +119,6 @@ Checks: [ '-readability-named-parameter', '-readability-redundant-declaration', '-readability-simplify-boolean-expr', - '-readability-static-accessed-through-instance', '-readability-suspicious-call-argument', '-readability-uppercase-literal-suffix', '-readability-use-anyofallof', diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index b7c3c7f5c97a..6c7687992216 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -166,7 +166,7 @@ int DisksApp::main(const std::vector & /*args*/) { String config_path = config().getString("config-file", getDefaultConfigFileName()); ConfigProcessor config_processor(config_path, false, false); - config_processor.setConfigPath(fs::path(config_path).parent_path()); + ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); config().add(loaded_config.configuration.duplicate(), false, false); } diff --git a/programs/keeper-client/KeeperClient.cpp b/programs/keeper-client/KeeperClient.cpp index 8297fab5ed98..52d825f30e6b 100644 --- a/programs/keeper-client/KeeperClient.cpp +++ b/programs/keeper-client/KeeperClient.cpp @@ -368,7 +368,7 @@ int KeeperClient::main(const std::vector & /* args */) DB::ConfigProcessor config_processor(config().getString("config-file", "config.xml")); /// This will handle a situation when clickhouse is running on the embedded config, but config.d folder is also present. - config_processor.registerEmbeddedConfig("config.xml", ""); + ConfigProcessor::registerEmbeddedConfig("config.xml", ""); auto clickhouse_config = config_processor.loadConfig(); Poco::Util::AbstractConfiguration::Keys keys; diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 05c9830ee2c2..72920fbd8559 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -122,7 +122,7 @@ void LocalServer::initialize(Poco::Util::Application & self) { const auto config_path = config().getString("config-file", "config.xml"); ConfigProcessor config_processor(config_path, false, true); - config_processor.setConfigPath(fs::path(config_path).parent_path()); + ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); auto loaded_config = config_processor.loadConfig(); config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false); } diff --git a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp index 8582c8c56fc2..3d7d6eff6084 100644 --- a/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp +++ b/src/AggregateFunctions/AggregateFunctionExponentialMovingAverage.cpp @@ -54,30 +54,30 @@ class AggregateFunctionExponentialMovingAverage final { const auto & value = columns[0]->getFloat64(row_num); const auto & time = columns[1]->getFloat64(row_num); - this->data(place).add(value, time, half_decay); + data(place).add(value, time, half_decay); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override { - this->data(place).merge(this->data(rhs), half_decay); + data(place).merge(data(rhs), half_decay); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - writeBinary(this->data(place).value, buf); - writeBinary(this->data(place).time, buf); + writeBinary(data(place).value, buf); + writeBinary(data(place).time, buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena *) const override { - readBinary(this->data(place).value, buf); - readBinary(this->data(place).time, buf); + readBinary(data(place).value, buf); + readBinary(data(place).time, buf); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { auto & column = assert_cast &>(to); - column.getData().push_back(this->data(place).get(half_decay)); + column.getData().push_back(data(place).get(half_decay)); } }; diff --git a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp index 882150325be3..e1224fae2fba 100644 --- a/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp +++ b/src/AggregateFunctions/AggregateFunctionKolmogorovSmirnovTest.cpp @@ -293,32 +293,32 @@ class AggregateFunctionKolmogorovSmirnov final: Float64 value = columns[0]->getFloat64(row_num); UInt8 is_second = columns[1]->getUInt(row_num); if (is_second) - this->data(place).addY(value, arena); + data(place).addY(value, arena); else - this->data(place).addX(value, arena); + data(place).addX(value, arena); } void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override { - this->data(place).merge(this->data(rhs), arena); + data(place).merge(data(rhs), arena); } void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional /* version */) const override { - this->data(place).write(buf); + data(place).write(buf); } void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional /* version */, Arena * arena) const override { - this->data(place).read(buf, arena); + data(place).read(buf, arena); } void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override { - if (!this->data(place).size_x || !this->data(place).size_y) + if (!data(place).size_x || !data(place).size_y) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Aggregate function {} require both samples to be non empty", getName()); - auto [d_statistic, p_value] = this->data(place).getResult(alternative, method); + auto [d_statistic, p_value] = data(place).getResult(alternative, method); /// Because p-value is a probability. p_value = std::min(1.0, std::max(0.0, p_value)); diff --git a/src/AggregateFunctions/DDSketch.h b/src/AggregateFunctions/DDSketch.h index 65ce73226966..dae935bd43d3 100644 --- a/src/AggregateFunctions/DDSketch.h +++ b/src/AggregateFunctions/DDSketch.h @@ -147,6 +147,8 @@ class DDSketchDenseLogarithmic negative_store->merge(other.negative_store.get()); } + /// NOLINTBEGIN(readability-static-accessed-through-instance) + void serialize(WriteBuffer& buf) const { // Write the mapping @@ -201,6 +203,8 @@ class DDSketchDenseLogarithmic count = static_cast(negative_store->count + zero_count + store->count); } + /// NOLINTEND(readability-static-accessed-through-instance) + private: std::unique_ptr mapping; std::unique_ptr store; diff --git a/src/AggregateFunctions/DDSketch/Store.h b/src/AggregateFunctions/DDSketch/Store.h index 428b2a6c9b8f..f12c3875dcf2 100644 --- a/src/AggregateFunctions/DDSketch/Store.h +++ b/src/AggregateFunctions/DDSketch/Store.h @@ -87,6 +87,8 @@ class DDSketchDenseStore count += other->count; } + /// NOLINTBEGIN(readability-static-accessed-through-instance) + void serialize(WriteBuffer& buf) const { @@ -179,6 +181,8 @@ class DDSketchDenseStore } } + /// NOLINTEND(readability-static-accessed-through-instance) + private: UInt32 chunk_size; DDSketchEncoding enc; diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index fffb8f7f2819..59c02f2b96f6 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -5793,7 +5793,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi return result_projection_names; } - FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); + FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters); /// NOLINT(readability-static-accessed-through-instance) bool is_executable_udf = true; IdentifierResolveScope::ResolvedFunctionsCache * function_cache = nullptr; @@ -5823,7 +5823,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi { std::vector possible_function_names; - auto function_names = UserDefinedExecutableFunctionFactory::instance().getRegisteredNames(scope.context); + auto function_names = UserDefinedExecutableFunctionFactory::instance().getRegisteredNames(scope.context); /// NOLINT(readability-static-accessed-through-instance) possible_function_names.insert(possible_function_names.end(), function_names.begin(), function_names.end()); function_names = UserDefinedSQLFunctionFactory::instance().getAllRegisteredNames(); @@ -5841,8 +5841,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi possible_function_names.push_back(name); } - NamePrompter<2> name_prompter; - auto hints = name_prompter.getHints(function_name, possible_function_names); + auto hints = NamePrompter<2>::getHints(function_name, possible_function_names); throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Function with name '{}' does not exists. In scope {}{}", diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 180942e6b838..f8607bf14c66 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -141,7 +141,7 @@ void Connection::connect(const ConnectionTimeouts & timeouts) async_callback(socket->impl()->sockfd(), connection_timeout, AsyncEventTimeoutType::CONNECT, description, AsyncTaskExecutor::READ | AsyncTaskExecutor::WRITE | AsyncTaskExecutor::ERROR); if (auto err = socket->impl()->socketError()) - socket->impl()->error(err); // Throws an exception + socket->impl()->error(err); // Throws an exception /// NOLINT(readability-static-accessed-through-instance) socket->setBlocking(true); } diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index 0a4f90c22625..90ef974010cb 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -940,7 +940,7 @@ void ColumnObject::addNestedSubcolumn(const PathInData & key, const FieldInfo & if (nested_node) { /// Find any leaf of Nested subcolumn. - const auto * leaf = subcolumns.findLeaf(nested_node, [&](const auto &) { return true; }); + const auto * leaf = Subcolumns::findLeaf(nested_node, [&](const auto &) { return true; }); assert(leaf); /// Recreate subcolumn with default values and the same sizes of arrays. @@ -983,7 +983,7 @@ const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(cons while (current_node) { /// Try to find the first Nested up to the current node. - const auto * node_nested = subcolumns.findParent(current_node, + const auto * node_nested = Subcolumns::findParent(current_node, [](const auto & candidate) { return candidate.isNested(); }); if (!node_nested) @@ -993,7 +993,7 @@ const ColumnObject::Subcolumns::Node * ColumnObject::getLeafOfTheSameNested(cons /// for the last rows. /// If there are no leaves, skip current node and find /// the next node up to the current. - leaf = subcolumns.findLeaf(node_nested, + leaf = Subcolumns::findLeaf(node_nested, [&](const auto & candidate) { return candidate.data.size() > old_size; diff --git a/src/Compression/tests/gtest_compressionCodec.cpp b/src/Compression/tests/gtest_compressionCodec.cpp index 16573e035e04..f3f6345a5b5f 100644 --- a/src/Compression/tests/gtest_compressionCodec.cpp +++ b/src/Compression/tests/gtest_compressionCodec.cpp @@ -483,7 +483,7 @@ void testTranscoding(Timer & timer, ICompressionCodec & codec, const CodecTestSe ASSERT_TRUE(EqualByteContainers(test_sequence.data_type->getSizeOfValueInMemory(), source_data, decoded)); - const auto header_size = codec.getHeaderSize(); + const auto header_size = ICompressionCodec::getHeaderSize(); const auto compression_ratio = (encoded_size - header_size) / (source_data.size() * 1.0); if (expected_compression_ratio) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index cc22db3969c4..9f10069d4523 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -665,7 +665,7 @@ void BaseDaemon::reloadConfiguration() */ config_path = config().getString("config-file", getDefaultConfigFileName()); ConfigProcessor config_processor(config_path, false, true); - config_processor.setConfigPath(fs::path(config_path).parent_path()); + ConfigProcessor::setConfigPath(fs::path(config_path).parent_path()); loaded_config = config_processor.loadConfig(/* allow_zk_includes = */ true); if (last_configuration != nullptr) diff --git a/src/DataTypes/DataTypeDate32.cpp b/src/DataTypes/DataTypeDate32.cpp index 762552bcb4cd..343e498d303c 100644 --- a/src/DataTypes/DataTypeDate32.cpp +++ b/src/DataTypes/DataTypeDate32.cpp @@ -18,7 +18,7 @@ SerializationPtr DataTypeDate32::doGetDefaultSerialization() const Field DataTypeDate32::getDefault() const { - return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + return -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); /// NOLINT(readability-static-accessed-through-instance) } void registerDataTypeDate32(DataTypeFactory & factory) diff --git a/src/DataTypes/Serializations/tests/gtest_json_parser.cpp b/src/DataTypes/Serializations/tests/gtest_json_parser.cpp index 9b0c8e44d023..1b5b02d579cf 100644 --- a/src/DataTypes/Serializations/tests/gtest_json_parser.cpp +++ b/src/DataTypes/Serializations/tests/gtest_json_parser.cpp @@ -34,7 +34,7 @@ TEST(JSONDataParser, ReadJSON) JSONDataParser parser; ReadBufferFromString buf(json_bad); String res; - parser.readJSON(res, buf); + JSONDataParser::readJSON(res, buf); ASSERT_EQ(json1, res); } @@ -44,7 +44,7 @@ TEST(JSONDataParser, ReadJSON) JSONDataParser parser; ReadBufferFromString buf(json_bad); String res; - parser.readJSON(res, buf); + JSONDataParser::readJSON(res, buf); ASSERT_EQ(json2, res); } } diff --git a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp index 1e108b481ee8..1fe369832acb 100644 --- a/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp +++ b/src/Disks/IO/CachedOnDiskReadBufferFromFile.cpp @@ -346,7 +346,7 @@ CachedOnDiskReadBufferFromFile::getReadBufferForFileSegment(FileSegment & file_s } auto downloader_id = file_segment.getOrSetDownloader(); - if (downloader_id == file_segment.getCallerId()) + if (downloader_id == FileSegment::getCallerId()) { if (canStartFromCache(file_offset_of_buffer_end, file_segment)) { diff --git a/src/Functions/FunctionsConversion.cpp b/src/Functions/FunctionsConversion.cpp index cc2ec4fb0454..5e072d406adf 100644 --- a/src/Functions/FunctionsConversion.cpp +++ b/src/Functions/FunctionsConversion.cpp @@ -279,7 +279,7 @@ struct ToDate32Transform32Or64Signed static NO_SANITIZE_UNDEFINED Int32 execute(const FromType & from, const DateLUTImpl & time_zone) { - static const Int32 daynum_min_offset = -static_cast(time_zone.getDayNumOffsetEpoch()); + static const Int32 daynum_min_offset = -static_cast(DateLUTImpl::getDayNumOffsetEpoch()); if constexpr (date_time_overflow_behavior == FormatSettings::DateTimeOverflowBehavior::Throw) { @@ -1092,7 +1092,7 @@ struct ConvertThroughParsing { if constexpr (std::is_same_v) { - vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); + vec_to[i] = -static_cast(DateLUT::instance().getDayNumOffsetEpoch()); /// NOLINT(readability-static-accessed-through-instance) } else { diff --git a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp index e22cd6d0022b..e6796874e506 100644 --- a/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLFunctionFactory.cpp @@ -106,7 +106,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeRegistered(const ContextPtr & cont if (AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "The aggregate function '{}' already exists", function_name); - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) /// NOLINT(readability-static-accessed-through-instance) throw Exception(ErrorCodes::FUNCTION_ALREADY_EXISTS, "User defined executable function '{}' already exists", function_name); validateFunction(assert_cast(create_function_query).function_core, function_name); @@ -118,7 +118,7 @@ void UserDefinedSQLFunctionFactory::checkCanBeUnregistered(const ContextPtr & co AggregateFunctionFactory::instance().hasNameOrAlias(function_name)) throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop system function '{}'", function_name); - if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) + if (UserDefinedExecutableFunctionFactory::instance().has(function_name, context)) /// NOLINT(readability-static-accessed-through-instance) throw Exception(ErrorCodes::CANNOT_DROP_FUNCTION, "Cannot drop user defined executable function '{}'", function_name); } diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index 68c0c7227ca8..0cfb60d6527e 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -37,7 +37,7 @@ MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache( MMapReadBufferFromFileWithCache::MMapReadBufferFromFileWithCache( MMappedFileCache & cache, const std::string & file_name, size_t offset) { - mapped = cache.getOrSet(cache.hash(file_name, offset, -1), [&] + mapped = cache.getOrSet(MMappedFileCache::hash(file_name, offset, -1), [&] { return std::make_shared(file_name, offset); }); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 093c266c785a..504b72575630 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -1071,7 +1071,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & auto current_context = data.getContext(); - if (UserDefinedExecutableFunctionFactory::instance().has(node.name, current_context)) + if (UserDefinedExecutableFunctionFactory::instance().has(node.name, current_context)) /// NOLINT(readability-static-accessed-through-instance) { Array parameters; if (node.parameters) @@ -1087,7 +1087,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & } } - function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(node.name, current_context, parameters); + function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(node.name, current_context, parameters); /// NOLINT(readability-static-accessed-through-instance) } if (!function_builder) diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index bd78c7a1bc16..2db07bb77f66 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -1056,7 +1056,7 @@ void NO_INLINE Aggregator::executeImplBatch( /// During processing of row #i we will prefetch HashTable cell for row #(i + prefetch_look_ahead). PrefetchingHelper prefetching; - size_t prefetch_look_ahead = prefetching.getInitialLookAheadValue(); + size_t prefetch_look_ahead = PrefetchingHelper::getInitialLookAheadValue(); /// Optimization for special case when there are no aggregate functions. if (params.aggregates_size == 0) @@ -1077,7 +1077,7 @@ void NO_INLINE Aggregator::executeImplBatch( { if constexpr (prefetch && HasPrefetchMemberFunc) { - if (i == row_begin + prefetching.iterationsToMeasure()) + if (i == row_begin + PrefetchingHelper::iterationsToMeasure()) prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); if (i + prefetch_look_ahead < row_end) @@ -1163,7 +1163,7 @@ void NO_INLINE Aggregator::executeImplBatch( if constexpr (prefetch && HasPrefetchMemberFunc) { - if (i == key_start + prefetching.iterationsToMeasure()) + if (i == key_start + PrefetchingHelper::iterationsToMeasure()) prefetch_look_ahead = prefetching.calcPrefetchLookAhead(); if (i + prefetch_look_ahead < row_end) diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 7acaf95beccb..2115dc57126d 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -56,7 +56,7 @@ InterpreterAlterQuery::InterpreterAlterQuery(const ASTPtr & query_ptr_, ContextP BlockIO InterpreterAlterQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); const auto & alter = query_ptr->as(); if (alter.alter_object == ASTAlterQuery::AlterObjectType::DATABASE) { @@ -131,7 +131,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) if (modify_query) { // Expand CTE before filling default database - ApplyWithSubqueryVisitor().visit(*modify_query); + ApplyWithSubqueryVisitor::visit(*modify_query); } /// Add default database to table identifiers that we can encounter in e.g. default expressions, mutation expression, etc. diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index 6045b5d2e24a..a439cb672c82 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -25,7 +25,7 @@ namespace ErrorCodes BlockIO InterpreterCreateIndexQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); auto current_context = getContext(); const auto & create_index = query_ptr->as(); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 7c3bed7388c8..c0da510a68b7 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1114,7 +1114,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) // Table SQL definition is available even if the table is detached (even permanently) auto query = database->getCreateTableQuery(create.getTable(), getContext()); - FunctionNameNormalizer().visit(query.get()); + FunctionNameNormalizer::visit(query.get()); auto create_query = query->as(); if (!create.is_dictionary && create_query.is_dictionary) @@ -1184,7 +1184,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) if (create.select && create.isView()) { // Expand CTE before filling default database - ApplyWithSubqueryVisitor().visit(*create.select); + ApplyWithSubqueryVisitor::visit(*create.select); AddDefaultDatabaseVisitor visitor(getContext(), current_database); visitor.visit(*create.select); } @@ -1763,7 +1763,7 @@ BlockIO InterpreterCreateQuery::executeQueryOnCluster(ASTCreateQuery & create) BlockIO InterpreterCreateQuery::execute() { - FunctionNameNormalizer().visit(query_ptr.get()); + FunctionNameNormalizer::visit(query_ptr.get()); auto & create = query_ptr->as(); bool is_create_database = create.database && !create.table; diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index 8f11754b3bef..d1520c92dbc2 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -168,7 +168,7 @@ void Set::setHeader(const ColumnsWithTypeAndName & header) } /// Choose data structure to use for the set. - data.init(data.chooseMethod(key_columns, key_sizes)); + data.init(SetVariants::chooseMethod(key_columns, key_sizes)); } void Set::fillSetElements() diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index a341dae32fa1..7b9790881702 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -144,7 +144,7 @@ void optimizeGroupBy(ASTSelectQuery * select_query, ContextPtr context) } else { - FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context); + FunctionOverloadResolverPtr function_builder = UserDefinedExecutableFunctionFactory::instance().tryGet(function->name, context); /// NOLINT(readability-static-accessed-through-instance) if (!function_builder) function_builder = function_factory.get(function->name, context); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ea2f69bd2b15..c11ddc3467a4 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -923,7 +923,7 @@ static std::tuple executeQueryImpl( /// Propagate WITH statement to children ASTSelect. if (settings.enable_global_with_statement) { - ApplyWithGlobalVisitor().visit(ast); + ApplyWithGlobalVisitor::visit(ast); } { diff --git a/src/Interpreters/tests/gtest_filecache.cpp b/src/Interpreters/tests/gtest_filecache.cpp index 09afb01d7ffb..41191ba16055 100644 --- a/src/Interpreters/tests/gtest_filecache.cpp +++ b/src/Interpreters/tests/gtest_filecache.cpp @@ -364,7 +364,7 @@ TEST_F(FileCacheTest, LRUPolicy) std::cerr << "Step 1\n"; auto cache = DB::FileCache("1", settings); cache.initialize(); - auto key = cache.createKeyForPath("key1"); + auto key = DB::FileCache::createKeyForPath("key1"); auto get_or_set = [&](size_t offset, size_t size) { @@ -728,7 +728,7 @@ TEST_F(FileCacheTest, LRUPolicy) auto cache2 = DB::FileCache("2", settings); cache2.initialize(); - auto key = cache2.createKeyForPath("key1"); + auto key = DB::FileCache::createKeyForPath("key1"); /// Get [2, 29] assertEqual( @@ -747,7 +747,7 @@ TEST_F(FileCacheTest, LRUPolicy) fs::create_directories(settings2.base_path); auto cache2 = DB::FileCache("3", settings2); cache2.initialize(); - auto key = cache2.createKeyForPath("key1"); + auto key = DB::FileCache::createKeyForPath("key1"); /// Get [0, 24] assertEqual( @@ -762,7 +762,7 @@ TEST_F(FileCacheTest, LRUPolicy) auto cache = FileCache("4", settings); cache.initialize(); - const auto key = cache.createKeyForPath("key10"); + const auto key = FileCache::createKeyForPath("key10"); const auto key_path = cache.getKeyPath(key, user); cache.removeAllReleasable(user.user_id); @@ -786,7 +786,7 @@ TEST_F(FileCacheTest, LRUPolicy) auto cache = DB::FileCache("5", settings); cache.initialize(); - const auto key = cache.createKeyForPath("key10"); + const auto key = FileCache::createKeyForPath("key10"); const auto key_path = cache.getKeyPath(key, user); cache.removeAllReleasable(user.user_id); @@ -823,7 +823,7 @@ TEST_F(FileCacheTest, writeBuffer) segment_settings.kind = FileSegmentKind::Temporary; segment_settings.unbounded = true; - auto cache_key = cache.createKeyForPath(key); + auto cache_key = FileCache::createKeyForPath(key); auto holder = cache.set(cache_key, 0, 3, segment_settings, user); /// The same is done in TemporaryDataOnDisk::createStreamToCacheFile. std::filesystem::create_directories(cache.getKeyPath(cache_key, user)); @@ -949,7 +949,7 @@ TEST_F(FileCacheTest, temporaryData) const auto user = FileCache::getCommonUser(); auto tmp_data_scope = std::make_shared(nullptr, &file_cache, TemporaryDataOnDiskSettings{}); - auto some_data_holder = file_cache.getOrSet(file_cache.createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user); + auto some_data_holder = file_cache.getOrSet(FileCache::createKeyForPath("some_data"), 0, 5_KiB, 5_KiB, CreateFileSegmentSettings{}, 0, user); { ASSERT_EQ(some_data_holder->size(), 5); @@ -1199,7 +1199,7 @@ TEST_F(FileCacheTest, SLRUPolicy) { auto cache = DB::FileCache(std::to_string(++file_cache_name), settings); cache.initialize(); - auto key = cache.createKeyForPath("key1"); + auto key = FileCache::createKeyForPath("key1"); auto add_range = [&](size_t offset, size_t size) { diff --git a/src/Loggers/Loggers.cpp b/src/Loggers/Loggers.cpp index c5862b82f34a..f794ad336e26 100644 --- a/src/Loggers/Loggers.cpp +++ b/src/Loggers/Loggers.cpp @@ -53,6 +53,8 @@ static std::string renderFileNameTemplate(time_t now, const std::string & file_p return path.replace_filename(ss.str()); } +/// NOLINTBEGIN(readability-static-accessed-through-instance) + void Loggers::buildLoggers(Poco::Util::AbstractConfiguration & config, Poco::Logger & logger /*_root*/, const std::string & cmd_name) { auto current_logger = config.getString("logger", ""); @@ -393,6 +395,8 @@ void Loggers::updateLevels(Poco::Util::AbstractConfiguration & config, Poco::Log } } +/// NOLINTEND(readability-static-accessed-through-instance) + void Loggers::closeLogs(Poco::Logger & logger) { if (log_file) diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index d175ec8d44b1..0865ec8c0bb3 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -86,7 +86,7 @@ static const Graphite::Pattern undef_pattern = .regexp_str = "", .function = nullptr, .retentions = Graphite::Retentions(), - .type = undef_pattern.TypeUndef, + .type = Graphite::Pattern::TypeUndef, }; inline static const Patterns & selectPatternsForMetricType(const Graphite::Params & params, std::string_view path) @@ -118,18 +118,18 @@ Graphite::RollupRule selectPatternForPath( if (!pattern.regexp) { /// Default pattern - if (first_match->type == first_match->TypeUndef && pattern.type == pattern.TypeAll) + if (first_match->type == Graphite::Pattern::TypeUndef && pattern.type == Graphite::Pattern::TypeAll) { /// There is only default pattern for both retention and aggregation return {&pattern, &pattern}; } if (pattern.type != first_match->type) { - if (first_match->type == first_match->TypeRetention) + if (first_match->type == Graphite::Pattern::TypeRetention) { return {first_match, &pattern}; } - if (first_match->type == first_match->TypeAggregation) + if (first_match->type == Graphite::Pattern::TypeAggregation) { return {&pattern, first_match}; } @@ -140,23 +140,23 @@ Graphite::RollupRule selectPatternForPath( if (pattern.regexp->match(path.data(), path.size())) { /// General pattern with matched path - if (pattern.type == pattern.TypeAll) + if (pattern.type == Graphite::Pattern::TypeAll) { /// Only for not default patterns with both function and retention parameters return {&pattern, &pattern}; } - if (first_match->type == first_match->TypeUndef) + if (first_match->type == Graphite::Pattern::TypeUndef) { first_match = &pattern; continue; } if (pattern.type != first_match->type) { - if (first_match->type == first_match->TypeRetention) + if (first_match->type == Graphite::Pattern::TypeRetention) { return {first_match, &pattern}; } - if (first_match->type == first_match->TypeAggregation) + if (first_match->type == Graphite::Pattern::TypeAggregation) { return {&pattern, first_match}; } @@ -415,24 +415,24 @@ static const Pattern & appendGraphitePattern( if (!pattern.function) { - pattern.type = pattern.TypeRetention; + pattern.type = Graphite::Pattern::TypeRetention; } else if (pattern.retentions.empty()) { - pattern.type = pattern.TypeAggregation; + pattern.type = Graphite::Pattern::TypeAggregation; } else { - pattern.type = pattern.TypeAll; + pattern.type = Graphite::Pattern::TypeAll; } - if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll + if (pattern.type & Graphite::Pattern::TypeAggregation) /// TypeAggregation or TypeAll if (pattern.function->allocatesMemoryInArena()) throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, "Aggregate function {} isn't supported in GraphiteMergeTree", pattern.function->getName()); /// retention should be in descending order of age. - if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll + if (pattern.type & Graphite::Pattern::TypeRetention) /// TypeRetention or TypeAll ::sort(pattern.retentions.begin(), pattern.retentions.end(), compareRetentions); patterns.emplace_back(pattern); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 7b0cfdf6a6cc..f24b2d4f2783 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -467,7 +467,7 @@ void StorageFileLog::openFilesAndSetPos() auto & reader = file_ctx.reader.value(); assertStreamGood(reader); - reader.seekg(0, reader.end); + reader.seekg(0, reader.end); /// NOLINT(readability-static-accessed-through-instance) assertStreamGood(reader); auto file_end = reader.tellg(); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 461d9a31eaa9..a301c229c77d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2994,7 +2994,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context commands.apply(new_metadata, local_context); - if (commands.hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index) + if (AlterCommands::hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index) throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index a0c4156a7048..447fd87cdc96 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -116,7 +116,7 @@ StorageDictionary::StorageDictionary( : StorageDictionary( table_id, table_id.getFullNameNotQuoted(), - context_->getExternalDictionariesLoader().getDictionaryStructure(*dictionary_configuration), + context_->getExternalDictionariesLoader().getDictionaryStructure(*dictionary_configuration), /// NOLINT(readability-static-accessed-through-instance) dictionary_configuration->getString("dictionary.comment", ""), Location::SameDatabaseAndNameAsDictionary, context_) diff --git a/src/Storages/StorageFuzzJSON.cpp b/src/Storages/StorageFuzzJSON.cpp index 918f54b16722..fbfc67f4c7c1 100644 --- a/src/Storages/StorageFuzzJSON.cpp +++ b/src/Storages/StorageFuzzJSON.cpp @@ -437,7 +437,7 @@ void fuzzJSONObject( bool first = true; for (const auto & ptr : node_list) { - if (node_count >= config.value_number_limit) + if (node_count >= StorageFuzzJSON::Configuration::value_number_limit) break; WriteBufferFromOwnString child_out; diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index 967132e4d4a7..c5c912948011 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -179,7 +179,7 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c } const auto & user_defined_executable_functions_factory = UserDefinedExecutableFunctionFactory::instance(); - const auto & user_defined_executable_functions_names = user_defined_executable_functions_factory.getRegisteredNames(context); + const auto & user_defined_executable_functions_names = user_defined_executable_functions_factory.getRegisteredNames(context); /// NOLINT(readability-static-accessed-through-instance) for (const auto & function_name : user_defined_executable_functions_names) { fillRow(res_columns, function_name, 0, {0}, "", FunctionOrigin::EXECUTABLE_USER_DEFINED, user_defined_executable_functions_factory); diff --git a/src/TableFunctions/TableFunctionExplain.cpp b/src/TableFunctions/TableFunctionExplain.cpp index 8607597fa678..df2835dd6307 100644 --- a/src/TableFunctions/TableFunctionExplain.cpp +++ b/src/TableFunctions/TableFunctionExplain.cpp @@ -136,7 +136,7 @@ void TableFunctionExplain::parseArguments(const ASTPtr & ast_function, ContextPt ColumnsDescription TableFunctionExplain::getActualTableStructure(ContextPtr context, bool /*is_insert_query*/) const { - Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); + Block sample_block = getInterpreter(context).getSampleBlock(query->as()->getKind()); /// NOLINT(readability-static-accessed-through-instance) ColumnsDescription columns_description; for (const auto & column : sample_block.getColumnsWithTypeAndName()) columns_description.add(ColumnDescription(column.name, column.type)); From 12813be9b859f21f9b2eb6424b1cdc3d95b32949 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 3 Apr 2024 13:08:52 +0000 Subject: [PATCH 317/470] Ignore queries involving system tables by query cache Fixes #59071 --- docs/en/operations/query-cache.md | 7 ++- docs/en/operations/settings/settings.md | 12 +++++ src/Common/ErrorCodes.cpp | 1 + src/Core/Settings.h | 1 + src/Core/SettingsEnums.cpp | 4 ++ src/Core/SettingsEnums.h | 9 ++++ src/Interpreters/Cache/QueryCache.cpp | 42 ++++++++++++++++ src/Interpreters/Cache/QueryCache.h | 3 ++ src/Interpreters/executeQuery.cpp | 14 +++++- .../02494_query_cache_system_tables.reference | 13 +++++ .../02494_query_cache_system_tables.sql | 48 +++++++++++++++++++ 11 files changed, 151 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02494_query_cache_system_tables.reference create mode 100644 tests/queries/0_stateless/02494_query_cache_system_tables.sql diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index a8532bc22b7e..7a920671fc23 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -67,8 +67,7 @@ SETTINGS use_query_cache = true, enable_writes_to_query_cache = false; For maximum control, it is generally recommended to provide settings `use_query_cache`, `enable_writes_to_query_cache` and `enable_reads_from_query_cache` only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET -use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables -may return cached results then. +use_query_cache = true`) but one should keep in mind that all `SELECT` queries may return cached results then. The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table [system.query_cache](system-tables/query_cache.md). The number of query cache hits and misses since database start are shown as events @@ -175,6 +174,10 @@ Also, results of queries with non-deterministic functions are not cached by defa To force caching of results of queries with non-deterministic functions regardless, use setting [query_cache_nondeterministic_function_handling](settings/settings.md#query-cache-nondeterministic-function-handling). +Results of queries that involve system tables, e.g. `system.processes` or `information_schema.tables`, are not cached by default. To force +caching of results of queries with system tables regardless, use setting +[query_cache_system_table_handling](settings/settings.md#query-cache-system-table-handling). + :::note Prior to ClickHouse v23.11, setting 'query_cache_store_results_of_queries_with_nondeterministic_functions = 0 / 1' controlled whether results of queries with non-deterministic results were cached. In newer ClickHouse versions, this setting is obsolete and has no effect. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e4e7be83f7da..a95c83fad3aa 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1689,6 +1689,18 @@ Possible values: Default value: `throw`. +## query_cache_system_table_handling {#query-cache-system-table-handling} + +Controls how the [query cache](../query-cache.md) handles `SELECT` queries against system tables, i.e. tables in databases `system.*` and `information_schema.*`. + +Possible values: + +- `'throw'` - Throw an exception and don't cache the query result. +- `'save'` - Cache the query result. +- `'ignore'` - Don't cache the query result and don't throw an exception. + +Default value: `throw`. + ## query_cache_min_query_runs {#query-cache-min-query-runs} Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md). diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index af609fabb8f3..97a339b2bace 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -597,6 +597,7 @@ M(716, CANNOT_FORGET_PARTITION) \ M(717, EXPERIMENTAL_FEATURE_ERROR) \ M(718, TOO_SLOW_PARSING) \ + M(719, QUERY_CACHE_USED_WITH_SYSTEM_TABLE) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Core/Settings.h b/src/Core/Settings.h index d5ea9534e6c8..78e482c60901 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -669,6 +669,7 @@ class IColumn; M(Bool, enable_writes_to_query_cache, true, "Enable storing results of SELECT queries in the query cache", 0) \ M(Bool, enable_reads_from_query_cache, true, "Enable reading results of SELECT queries from the query cache", 0) \ M(QueryCacheNondeterministicFunctionHandling, query_cache_nondeterministic_function_handling, QueryCacheNondeterministicFunctionHandling::Throw, "How the query cache handles queries with non-deterministic functions, e.g. now()", 0) \ + M(QueryCacheSystemTableHandling, query_cache_system_table_handling, QueryCacheSystemTableHandling::Throw, "How the query cache handles queries against system tables, i.e. tables in databases 'system.*' and 'information_schema.*'", 0) \ M(UInt64, query_cache_max_size_in_bytes, 0, "The maximum amount of memory (in bytes) the current user may allocate in the query cache. 0 means unlimited. ", 0) \ M(UInt64, query_cache_max_entries, 0, "The maximum number of query results the current user may store in the query cache. 0 means unlimited.", 0) \ M(UInt64, query_cache_min_query_runs, 0, "Minimum number a SELECT query must run before its result is stored in the query cache", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index e47174a3b9d1..0caf6e8d6098 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -87,6 +87,10 @@ IMPLEMENT_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling, ErrorCodes::B {"save", QueryCacheNondeterministicFunctionHandling::Save}, {"ignore", QueryCacheNondeterministicFunctionHandling::Ignore}}) +IMPLEMENT_SETTING_ENUM(QueryCacheSystemTableHandling, ErrorCodes::BAD_ARGUMENTS, + {{"throw", QueryCacheSystemTableHandling::Throw}, + {"save", QueryCacheSystemTableHandling::Save}, + {"ignore", QueryCacheSystemTableHandling::Ignore}}) IMPLEMENT_SETTING_ENUM(DateTimeInputFormat, ErrorCodes::BAD_ARGUMENTS, {{"basic", FormatSettings::DateTimeInputFormat::Basic}, diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 0aa8216bb857..b17ff11d4282 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -184,6 +184,15 @@ enum class QueryCacheNondeterministicFunctionHandling DECLARE_SETTING_ENUM(QueryCacheNondeterministicFunctionHandling) +/// How the query cache handles queries against system tables, tables in databases 'system.*' and 'information_schema.*' +enum class QueryCacheSystemTableHandling +{ + Throw, + Save, + Ignore +}; + +DECLARE_SETTING_ENUM(QueryCacheSystemTableHandling) DECLARE_SETTING_ENUM_WITH_RENAME(DateTimeInputFormat, FormatSettings::DateTimeInputFormat) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 151f2ea06cc9..8a76378f14c3 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -2,8 +2,10 @@ #include #include +#include #include #include +#include #include #include #include @@ -52,7 +54,40 @@ struct HasNonDeterministicFunctionsMatcher } }; +struct HasSystemTablesMatcher +{ + struct Data + { + const ContextPtr context; + bool has_system_tables = false; + }; + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + + static void visit(const ASTPtr & node, Data & data) + { + if (data.has_system_tables) + return; + + if (const auto * identifier = node->as()) + { + StorageID storage_id = identifier->getTableId(); + if (!storage_id.hasDatabase()) + /// The common case that a database name was not explicitly specified in the SQL. However, isPredefinedTable() is AST-based + /// and assumes that a database name was specified. This bites us in this edge situation: + /// USE SYSTEM; + /// SELECT * FROM PROCESSES; -- instead of SYSTEM.PROCESSES + /// In this case, don't call isPredefinedTable() (to avoid exceptions) and accept that the behavior is not 100% kosher. + return; + bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(storage_id); + if (is_predefined_table) + data.has_system_tables = true; + } + } +}; + using HasNonDeterministicFunctionsVisitor = InDepthNodeVisitor; +using HasSystemTablesVisitor = InDepthNodeVisitor; } @@ -63,6 +98,13 @@ bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context) return finder_data.has_non_deterministic_functions; } +bool astContainsSystemTables(ASTPtr ast, ContextPtr context) +{ + HasSystemTablesMatcher::Data finder_data{context}; + HasSystemTablesVisitor(finder_data).visit(ast); + return finder_data.has_system_tables; +} + namespace { diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h index c574f3085e30..a06f504de65b 100644 --- a/src/Interpreters/Cache/QueryCache.h +++ b/src/Interpreters/Cache/QueryCache.h @@ -17,6 +17,9 @@ namespace DB /// Does AST contain non-deterministic functions like rand() and now()? bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); +/// Does AST contain system tables like "system.processes"? +bool astContainsSystemTables(ASTPtr ast, ContextPtr context); + /// Maps queries to query results. Useful to avoid repeated query calculation. /// /// The cache does not aim to be transactionally consistent (which is difficult to get right). For example, the cache is not invalidated diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ea2f69bd2b15..02fced5850ba 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -97,6 +97,7 @@ namespace DB namespace ErrorCodes { extern const int QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS; + extern const int QUERY_CACHE_USED_WITH_SYSTEM_TABLE; extern const int INTO_OUTFILE_NOT_ALLOWED; extern const int INVALID_TRANSACTION; extern const int LOGICAL_ERROR; @@ -1198,15 +1199,26 @@ static std::tuple executeQueryImpl( /// top of the pipeline which stores the result in the query cache. if (can_use_query_cache && settings.enable_writes_to_query_cache) { + /// Only use the query cache if the query does not contain non-deterministic functions or system tables (which are typically non-deterministic) + const bool ast_contains_nondeterministic_functions = astContainsNonDeterministicFunctions(ast, context); + const bool ast_contains_system_tables = astContainsSystemTables(ast, context); + const QueryCacheNondeterministicFunctionHandling nondeterministic_function_handling = settings.query_cache_nondeterministic_function_handling; + const QueryCacheSystemTableHandling system_table_handling = settings.query_cache_system_table_handling; if (ast_contains_nondeterministic_functions && nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Throw) throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_NONDETERMINISTIC_FUNCTIONS, "The query result was not cached because the query contains a non-deterministic function." " Use setting `query_cache_nondeterministic_function_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching"); - if (!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save) + if (ast_contains_system_tables && system_table_handling == QueryCacheSystemTableHandling::Throw) + throw Exception(ErrorCodes::QUERY_CACHE_USED_WITH_SYSTEM_TABLE, + "The query result was not cached because the query contains a system table." + " Use setting `query_cache_system_table_handling = 'save'` or `= 'ignore'` to cache the query result regardless or to omit caching"); + + if ((!ast_contains_nondeterministic_functions || nondeterministic_function_handling == QueryCacheNondeterministicFunctionHandling::Save) + && (!ast_contains_system_tables || system_table_handling == QueryCacheSystemTableHandling::Save)) { QueryCache::Key key( ast, res.pipeline.getHeader(), diff --git a/tests/queries/0_stateless/02494_query_cache_system_tables.reference b/tests/queries/0_stateless/02494_query_cache_system_tables.reference new file mode 100644 index 000000000000..e41e365766e5 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_system_tables.reference @@ -0,0 +1,13 @@ +The Default for query_cache_system_table_handling is = throw +0 +Check behavior of query_cache_system_table_handling = throw +0 +Check behavior of query_cache_system_table_handling = save +0 +1 +Check behavior of query_cache_system_table_handling = ignore +0 +0 +Other tests +0 +0 diff --git a/tests/queries/0_stateless/02494_query_cache_system_tables.sql b/tests/queries/0_stateless/02494_query_cache_system_tables.sql new file mode 100644 index 000000000000..89ea9d2b6b79 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_system_tables.sql @@ -0,0 +1,48 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY CACHE; + +SELECT 'The Default for query_cache_system_table_handling is = throw'; +-- Test that the query cache rejects queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Check behavior of query_cache_system_table_handling = throw'; +-- Test that the query cache rejects queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'throw'; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Check behavior of query_cache_system_table_handling = save'; +-- Test that the query cache saves the result of queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'save'; +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Check behavior of query_cache_system_table_handling = ignore'; +-- Test that the query cache ignores the result of queries that involve system tables. +SELECT * FROM system.one SETTINGS use_query_cache = 1, query_cache_system_table_handling = 'ignore'; +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT 'Other tests'; + +-- Edge case which doesn't work well due to conceptual reasons (QueryCache is AST-based), test it anyways to have it documented. +USE system; +SELECT * FROM one SETTINGS use_query_cache = 1; -- doesn't throw but should + +-- This query uses system.zero internally. Since the query cache works at AST level it does not "see' system.zero and must not complain. +SELECT * SETTINGS use_query_cache = 1; + +-- information_schema is also treated as a system table +SELECT * FROM information_schema.tables SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT * FROM INFORMATION_SCHEMA.TABLES SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } + +-- Cleanup +SYSTEM DROP QUERY CACHE; From a4bec47a44ebb257f298f0254be0c2a7c3f62a95 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 7 Apr 2024 14:05:08 +0200 Subject: [PATCH 318/470] fix tests --- tests/queries/0_stateless/01056_window_view_proc_hop_watch.py | 2 ++ .../01059_window_view_event_hop_watch_strict_asc.py | 2 ++ .../0_stateless/01062_window_view_event_hop_watch_asc.py | 2 ++ .../0_stateless/01065_window_view_event_hop_watch_bounded.py | 2 ++ tests/queries/0_stateless/01070_window_view_watch_events.py | 2 ++ .../0_stateless/01078_window_view_alter_query_watch.py | 4 ++++ tests/queries/0_stateless/01082_window_view_watch_limit.py | 2 ++ 7 files changed, 16 insertions(+) diff --git a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py index 4c3e3ead4554..2db14fcdddf3 100755 --- a/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py +++ b/tests/queries/0_stateless/01056_window_view_proc_hop_watch.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01056_window_view_proc_hop_watch") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py index 9adff06442ed..2323ee5c8381 100755 --- a/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py +++ b/tests/queries/0_stateless/01059_window_view_event_hop_watch_strict_asc.py @@ -26,6 +26,8 @@ client1.expect(prompt) client1.send("SET window_view_heartbeat_interval = 1") client1.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS db_01059_event_hop_watch_strict_asc") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py index bb40b1df2f07..db9e8cef6c55 100755 --- a/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py +++ b/tests/queries/0_stateless/01062_window_view_event_hop_watch_asc.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01062_window_view_event_hop_watch_asc") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py index 7f00130b184e..b8d5ff02d379 100755 --- a/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py +++ b/tests/queries/0_stateless/01065_window_view_event_hop_watch_bounded.py @@ -27,6 +27,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send( "CREATE DATABASE IF NOT EXISTS 01065_window_view_event_hop_watch_bounded" diff --git a/tests/queries/0_stateless/01070_window_view_watch_events.py b/tests/queries/0_stateless/01070_window_view_watch_events.py index 8aeff041cc1a..1cf7678a014b 100755 --- a/tests/queries/0_stateless/01070_window_view_watch_events.py +++ b/tests/queries/0_stateless/01070_window_view_watch_events.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01070_window_view_watch_events") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py index c32e508c5a55..3f3dfe0cda8b 100755 --- a/tests/queries/0_stateless/01078_window_view_alter_query_watch.py +++ b/tests/queries/0_stateless/01078_window_view_alter_query_watch.py @@ -28,10 +28,14 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client3.send("SET allow_experimental_window_view = 1") client3.expect(prompt) client3.send("SET window_view_heartbeat_interval = 1") client3.expect(prompt) + client3.send("SET allow_experimental_analyzer = 0") + client3.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01078_window_view_alter_query_watch") client1.expect(prompt) diff --git a/tests/queries/0_stateless/01082_window_view_watch_limit.py b/tests/queries/0_stateless/01082_window_view_watch_limit.py index 12c8d2955918..9938ebcab984 100755 --- a/tests/queries/0_stateless/01082_window_view_watch_limit.py +++ b/tests/queries/0_stateless/01082_window_view_watch_limit.py @@ -27,6 +27,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE IF NOT EXISTS 01082_window_view_watch_limit") client1.expect(prompt) From 7dacd8aa842b94811b111c35cc2b21ae6564ba54 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 7 Apr 2024 14:46:55 +0000 Subject: [PATCH 319/470] Incorporate review feedback --- src/Interpreters/Cache/QueryCache.cpp | 37 +++++++++++++------ .../02494_query_cache_system_tables.sql | 3 ++ 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 8a76378f14c3..67fcdb8159c1 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -14,6 +15,7 @@ #include #include #include +#include #include #include /// chassert @@ -69,20 +71,31 @@ struct HasSystemTablesMatcher if (data.has_system_tables) return; - if (const auto * identifier = node->as()) + String database_table; /// or whatever else we get, e.g. just a table + + /// SELECT [...] FROM + if (const auto * table_identifier = node->as()) + { + database_table = table_identifier->name(); + } + /// SELECT [...] FROM clusterAllReplicas(,
) + else if (const auto * identifier = node->as()) { - StorageID storage_id = identifier->getTableId(); - if (!storage_id.hasDatabase()) - /// The common case that a database name was not explicitly specified in the SQL. However, isPredefinedTable() is AST-based - /// and assumes that a database name was specified. This bites us in this edge situation: - /// USE SYSTEM; - /// SELECT * FROM PROCESSES; -- instead of SYSTEM.PROCESSES - /// In this case, don't call isPredefinedTable() (to avoid exceptions) and accept that the behavior is not 100% kosher. - return; - bool is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(storage_id); - if (is_predefined_table) - data.has_system_tables = true; + database_table = identifier->name(); } + /// Handle SELECT [...] FROM clusterAllReplicas(, '
') + else if (const auto * literal = node->as()) + { + const auto & value = literal->value; /// (*) + database_table = applyVisitor(FieldVisitorDump(), value); + } + + /// (*) returns table in quotes, so we can't use .starts_with() for matching + static const re2::RE2 is_system_table(String(DatabaseCatalog::TEMPORARY_DATABASE) + + "|" + DatabaseCatalog::SYSTEM_DATABASE + + "|" + DatabaseCatalog::INFORMATION_SCHEMA + + "|" + DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); + data.has_system_tables = re2::RE2::PartialMatch(database_table, is_system_table); } }; diff --git a/tests/queries/0_stateless/02494_query_cache_system_tables.sql b/tests/queries/0_stateless/02494_query_cache_system_tables.sql index 89ea9d2b6b79..935011a6bb0c 100644 --- a/tests/queries/0_stateless/02494_query_cache_system_tables.sql +++ b/tests/queries/0_stateless/02494_query_cache_system_tables.sql @@ -44,5 +44,8 @@ SELECT * SETTINGS use_query_cache = 1; SELECT * FROM information_schema.tables SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } SELECT * FROM INFORMATION_SCHEMA.TABLES SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT * FROM clusterAllReplicas('test_shard_localhost', system.one) SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +SELECT * FROM clusterAllReplicas('test_shard_localhost', 'system.one') SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } + -- Cleanup SYSTEM DROP QUERY CACHE; From 285dbc39f7b3c7c82122a3ecf908bba47ecc3132 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Sun, 7 Apr 2024 17:12:52 +0000 Subject: [PATCH 320/470] Fix tests --- .../0_stateless/02494_query_cache_eligible_queries.sql | 1 + tests/queries/0_stateless/02494_query_cache_explain.sql | 1 + .../queries/0_stateless/02494_query_cache_sparse_columns.sql | 5 ++--- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql index 5c45ee8aedd4..f7ddb8f8bda3 100644 --- a/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql +++ b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql @@ -7,6 +7,7 @@ DROP TABLE IF EXISTS eligible_test2; -- enable query cache session-wide but also force it individually in each of below statements SET use_query_cache = true; +SET query_cache_system_table_handling = 'save'; -- check that SELECT statements create entries in the query cache ... SELECT 1 SETTINGS use_query_cache = true; diff --git a/tests/queries/0_stateless/02494_query_cache_explain.sql b/tests/queries/0_stateless/02494_query_cache_explain.sql index d12938181c2e..bf376b47fdb0 100644 --- a/tests/queries/0_stateless/02494_query_cache_explain.sql +++ b/tests/queries/0_stateless/02494_query_cache_explain.sql @@ -2,6 +2,7 @@ -- Tag no-parallel: Messes with internal cache SET allow_experimental_analyzer = 1; +SET query_cache_system_table_handling = 'save'; SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql b/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql index 4344d139d60b..6266996ac2eb 100644 --- a/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql +++ b/tests/queries/0_stateless/02494_query_cache_sparse_columns.sql @@ -12,11 +12,10 @@ SYSTEM STOP MERGES t_cache_sparse; INSERT INTO t_cache_sparse SELECT number, number FROM numbers(10000); INSERT INTO t_cache_sparse SELECT number, 0 FROM numbers(10000); -SET use_query_cache = 1; SET max_threads = 1; -SELECT v FROM t_cache_sparse FORMAT Null; -SELECT v FROM t_cache_sparse FORMAT Null; +SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null; +SELECT v FROM t_cache_sparse SETTINGS use_query_cache = 1, max_threads = 1 FORMAT Null; SELECT count() FROM system.query_cache WHERE query LIKE 'SELECT v FROM t_cache_sparse%'; DROP TABLE t_cache_sparse; From bbe8e2d751e2445afc07d97e18625d53868b2235 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 7 Apr 2024 20:35:27 +0300 Subject: [PATCH 321/470] Arrow schema to ClickHouse schema Nullable fix --- .../Formats/Impl/ArrowBlockInputFormat.cpp | 2 +- .../Formats/Impl/ArrowColumnToCHColumn.cpp | 365 ++++++++++++------ .../Formats/Impl/ArrowColumnToCHColumn.h | 23 +- .../Formats/Impl/ORCBlockInputFormat.cpp | 4 +- .../Formats/Impl/ParquetBlockInputFormat.cpp | 2 +- .../DataLakes/DeltaLakeMetadataParser.cpp | 3 +- 6 files changed, 264 insertions(+), 135 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index 206e244c75f6..fc9a827be667 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -86,7 +86,7 @@ Chunk ArrowBlockInputFormat::read() /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; - arrow_column_to_ch_column->arrowTableToCHChunk(res, *table_result, (*table_result)->num_rows(), block_missing_values_ptr); + res = arrow_column_to_ch_column->arrowTableToCHChunk(*table_result, (*table_result)->num_rows(), block_missing_values_ptr); /// There is no easy way to get original record batch size from Arrow metadata. /// Let's just use the number of bytes read from read buffer. diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 65704c85026c..ec2d17d73cb4 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -78,7 +78,7 @@ namespace ErrorCodes /// Inserts numeric data right into internal column data to reduce an overhead template > -static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithNumericData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared>(); auto internal_column = internal_type->createColumn(); @@ -103,7 +103,7 @@ static ColumnWithTypeAndName readColumnWithNumericData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithStringData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -147,7 +147,7 @@ static ColumnWithTypeAndName readColumnWithStringData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithFixedStringData(const std::shared_ptr & arrow_column, const String & column_name) { const auto * fixed_type = assert_cast(arrow_column->type().get()); size_t fixed_len = fixed_type->byte_width(); @@ -166,7 +166,7 @@ static ColumnWithTypeAndName readColumnWithFixedStringData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) +static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) { const auto * fixed_type = assert_cast(arrow_column->type().get()); size_t fixed_len = fixed_type->byte_width(); @@ -193,7 +193,7 @@ static ColumnWithTypeAndName readColumnWithBigIntegerFromFixedBinaryData(std::sh } template -static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) +static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & column_type) { size_t total_size = 0; for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) @@ -229,7 +229,7 @@ static ColumnWithTypeAndName readColumnWithBigNumberFromBinaryData(std::shared_p return {std::move(internal_column), column_type, column_name}; } -static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithBooleanData(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = DataTypeFactory::instance().get("Bool"); auto internal_column = internal_type->createColumn(); @@ -248,7 +248,7 @@ static ColumnWithTypeAndName readColumnWithBooleanData(std::shared_ptr & arrow_column, const String & column_name, +static ColumnWithTypeAndName readColumnWithDate32Data(const std::shared_ptr & arrow_column, const String & column_name, const DataTypePtr & type_hint, FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior) { DataTypePtr internal_type; @@ -310,7 +310,7 @@ static ColumnWithTypeAndName readColumnWithDate32Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithDate64Data(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -329,7 +329,7 @@ static ColumnWithTypeAndName readColumnWithDate64Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTimestampData(const std::shared_ptr & arrow_column, const String & column_name) { const auto & arrow_type = static_cast(*(arrow_column->type())); const UInt8 scale = arrow_type.unit() * 3; @@ -350,7 +350,7 @@ static ColumnWithTypeAndName readColumnWithTimestampData(std::shared_ptr -static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTimeData(const std::shared_ptr & arrow_column, const String & column_name) { const auto & arrow_type = static_cast(*(arrow_column->type())); const UInt8 scale = arrow_type.unit() * 3; @@ -373,18 +373,18 @@ static ColumnWithTypeAndName readColumnWithTimeData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTime32Data(const std::shared_ptr & arrow_column, const String & column_name) { return readColumnWithTimeData(arrow_column, column_name); } -static ColumnWithTypeAndName readColumnWithTime64Data(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithTime64Data(const std::shared_ptr & arrow_column, const String & column_name) { return readColumnWithTimeData(arrow_column, column_name); } template -static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) +static ColumnWithTypeAndName readColumnWithDecimalDataImpl(const std::shared_ptr & arrow_column, const String & column_name, DataTypePtr internal_type) { auto internal_column = internal_type->createColumn(); auto & column = assert_cast &>(*internal_column); @@ -403,7 +403,7 @@ static ColumnWithTypeAndName readColumnWithDecimalDataImpl(std::shared_ptr -static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readColumnWithDecimalData(const std::shared_ptr & arrow_column, const String & column_name) { const auto * arrow_decimal_type = static_cast(arrow_column->type().get()); size_t precision = arrow_decimal_type->precision(); @@ -418,7 +418,7 @@ static ColumnWithTypeAndName readColumnWithDecimalData(std::shared_ptr & arrow_column) +static ColumnPtr readByteMapFromArrowColumn(const std::shared_ptr & arrow_column) { if (!arrow_column->null_count()) return ColumnUInt8::create(arrow_column->length(), 0); @@ -453,7 +453,7 @@ struct ArrowOffsetArray }; template -static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr & arrow_column) +static ColumnPtr readOffsetsFromArrowListColumn(const std::shared_ptr & arrow_column) { auto offsets_column = ColumnUInt64::create(); ColumnArray::Offsets & offsets_data = assert_cast &>(*offsets_column).getData(); @@ -463,7 +463,7 @@ static ColumnPtr readOffsetsFromArrowListColumn(std::shared_ptr(*(arrow_column->chunk(chunk_i))); auto arrow_offsets_array = list_chunk.offsets(); - auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array); + auto & arrow_offsets = dynamic_cast::type &>(*arrow_offsets_array); /* * CH uses element size as "offsets", while arrow uses actual offsets as offsets. @@ -620,7 +620,7 @@ static ColumnPtr readColumnWithIndexesData(std::shared_ptr } template -static std::shared_ptr getNestedArrowColumn(std::shared_ptr & arrow_column) +static std::shared_ptr getNestedArrowColumn(const std::shared_ptr & arrow_column) { arrow::ArrayVector array_vector; array_vector.reserve(arrow_column->num_chunks()); @@ -648,7 +648,7 @@ static std::shared_ptr getNestedArrowColumn(std::shared_ptr return std::make_shared(array_vector); } -static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(const std::shared_ptr & arrow_column, const String & column_name) { size_t total_size = 0; for (int chunk_i = 0, num_chunks = arrow_column->num_chunks(); chunk_i < num_chunks; ++chunk_i) @@ -684,7 +684,7 @@ static ColumnWithTypeAndName readIPv6ColumnFromBinaryData(std::shared_ptr & arrow_column, const String & column_name) +static ColumnWithTypeAndName readIPv4ColumnWithInt32Data(const std::shared_ptr & arrow_column, const String & column_name) { auto internal_type = std::make_shared(); auto internal_column = internal_type->createColumn(); @@ -705,35 +705,31 @@ static ColumnWithTypeAndName readIPv4ColumnWithInt32Data(std::shared_ptr & arrow_column, - const std::string & column_name, - const std::string & format_name, - bool is_nullable, - std::unordered_map & dictionary_infos, - bool allow_null_type, - bool skip_columns_with_unsupported_types, - bool & skipped, - FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, - DataTypePtr type_hint = nullptr, - bool is_map_nested = false) +struct ReadColumnFromArrowColumnSettings { - if (!is_nullable && (arrow_column->null_count() || (type_hint && type_hint->isNullable())) && arrow_column->type()->id() != arrow::Type::LIST - && arrow_column->type()->id() != arrow::Type::MAP && arrow_column->type()->id() != arrow::Type::STRUCT && - arrow_column->type()->id() != arrow::Type::DICTIONARY) - { - DataTypePtr nested_type_hint; - if (type_hint) - nested_type_hint = removeNullable(type_hint); - auto nested_column = readColumnFromArrowColumn(arrow_column, column_name, format_name, true, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint); - if (skipped) - return {}; - auto nullmap_column = readByteMapFromArrowColumn(arrow_column); - auto nullable_type = std::make_shared(std::move(nested_column.type)); - auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); - return {std::move(nullable_column), std::move(nullable_type), column_name}; - } + std::string format_name; + FormatSettings::DateTimeOverflowBehavior date_time_overflow_behavior; + bool allow_arrow_null_type; + bool skip_columns_with_unsupported_types; +}; +static ColumnWithTypeAndName readColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_nullable_column, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings); + +static ColumnWithTypeAndName readNonNullableColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings) +{ switch (arrow_column->type()->id()) { case arrow::Type::STRING: @@ -790,7 +786,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( case arrow::Type::BOOL: return readColumnWithBooleanData(arrow_column, column_name); case arrow::Type::DATE32: - return readColumnWithDate32Data(arrow_column, column_name, type_hint, date_time_overflow_behavior); + return readColumnWithDate32Data(arrow_column, column_name, type_hint, settings.date_time_overflow_behavior); case arrow::Type::DATE64: return readColumnWithDate64Data(arrow_column, column_name); // ClickHouse writes Date as arrow UINT16 and DateTime as arrow UINT32, @@ -837,9 +833,16 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( key_type_hint = map_type_hint->getKeyType(); } } + auto arrow_nested_column = getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint, true); - if (skipped) + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, + column_name, + dictionary_infos, + nested_type_hint, + false /*is_nullable_column*/, + true /*is_map_nested_column*/, + settings); + if (!nested_column.column) return {}; auto offsets_column = readOffsetsFromArrowListColumn(arrow_column); @@ -866,7 +869,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( case arrow::Type::LIST: case arrow::Type::LARGE_LIST: { - bool is_large = arrow_column->type()->id() == arrow::Type::LARGE_LIST; + bool is_large_list = arrow_column->type()->id() == arrow::Type::LARGE_LIST; DataTypePtr nested_type_hint; if (type_hint) { @@ -874,12 +877,33 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( if (array_type_hint) nested_type_hint = array_type_hint->getNestedType(); } - auto arrow_nested_column = is_large ? getNestedArrowColumn(arrow_column) : getNestedArrowColumn(arrow_column); - auto nested_column = readColumnFromArrowColumn(arrow_nested_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint); - if (skipped) + + bool is_nested_nullable_column = false; + if (is_large_list) + { + auto * arrow_large_list_type = assert_cast(arrow_column->type().get()); + is_nested_nullable_column = arrow_large_list_type->value_field()->nullable(); + } + else + { + auto * arrow_list_type = assert_cast(arrow_column->type().get()); + is_nested_nullable_column = arrow_list_type->value_field()->nullable(); + } + + auto arrow_nested_column = is_large_list ? getNestedArrowColumn(arrow_column) : getNestedArrowColumn(arrow_column); + auto nested_column = readColumnFromArrowColumn(arrow_nested_column, + column_name, + dictionary_infos, + nested_type_hint, + is_nested_nullable_column, + false /*is_map_nested_column*/, + settings); + if (!nested_column.column) return {}; - auto offsets_column = is_large ? readOffsetsFromArrowListColumn(arrow_column) : readOffsetsFromArrowListColumn(arrow_column); + + auto offsets_column = is_large_list ? readOffsetsFromArrowListColumn(arrow_column) : readOffsetsFromArrowListColumn(arrow_column); auto array_column = ColumnArray::create(nested_column.column, offsets_column); + DataTypePtr array_type; /// If type hint is Nested, we should return Nested type, /// because we differentiate Nested and simple Array(Tuple) @@ -913,11 +937,13 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( for (int i = 0; i != arrow_struct_type->num_fields(); ++i) { - auto field_name = arrow_struct_type->field(i)->name(); + const auto & field = arrow_struct_type->field(i); + const auto & field_name = field->name(); + DataTypePtr nested_type_hint; if (tuple_type_hint) { - if (tuple_type_hint->haveExplicitNames() && !is_map_nested) + if (tuple_type_hint->haveExplicitNames() && !is_map_nested_column) { auto pos = tuple_type_hint->tryGetPositionByName(field_name); if (pos) @@ -926,13 +952,21 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( else if (size_t(i) < tuple_type_hint->getElements().size()) nested_type_hint = tuple_type_hint->getElement(i); } + auto nested_arrow_column = std::make_shared(nested_arrow_columns[i]); - auto element = readColumnFromArrowColumn(nested_arrow_column, field_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior, nested_type_hint); - if (skipped) + auto column_with_type_and_name = readColumnFromArrowColumn(nested_arrow_column, + field_name, + dictionary_infos, + nested_type_hint, + field->nullable(), + false /*is_map_nested_column*/, + settings); + if (!column_with_type_and_name.column) return {}; - tuple_elements.emplace_back(std::move(element.column)); - tuple_types.emplace_back(std::move(element.type)); - tuple_names.emplace_back(std::move(element.name)); + + tuple_elements.emplace_back(std::move(column_with_type_and_name.column)); + tuple_types.emplace_back(std::move(column_with_type_and_name.type)); + tuple_names.emplace_back(std::move(column_with_type_and_name.name)); } auto tuple_column = ColumnTuple::create(std::move(tuple_elements)); @@ -953,8 +987,19 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( arrow::DictionaryArray & dict_chunk = dynamic_cast(*(arrow_column->chunk(chunk_i))); dict_array.emplace_back(dict_chunk.dictionary()); } + auto arrow_dict_column = std::make_shared(dict_array); - auto dict_column = readColumnFromArrowColumn(arrow_dict_column, column_name, format_name, false, dictionary_infos, allow_null_type, skip_columns_with_unsupported_types, skipped, date_time_overflow_behavior); + auto dict_column = readColumnFromArrowColumn(arrow_dict_column, + column_name, + dictionary_infos, + nullptr /*nested_type_hint*/, + false /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings); + + if (!dict_column.column) + return {}; + for (size_t i = 0; i != dict_column.column->size(); ++i) { if (dict_column.column->isDefaultAt(i)) @@ -963,6 +1008,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( break; } } + auto lc_type = std::make_shared(is_lc_nullable ? makeNullable(dict_column.type) : dict_column.type); auto tmp_lc_column = lc_type->createColumn(); auto tmp_dict_column = IColumn::mutate(assert_cast(tmp_lc_column.get())->getDictionaryPtr()); @@ -1002,7 +1048,7 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( // TODO: read UUID as a string? case arrow::Type::NA: { - if (allow_null_type) + if (settings.allow_arrow_null_type) { auto type = std::make_shared(); auto column = ColumnNothing::create(arrow_column->length()); @@ -1012,11 +1058,8 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } default: { - if (skip_columns_with_unsupported_types) - { - skipped = true; + if (settings.skip_columns_with_unsupported_types) return {}; - } throw Exception( ErrorCodes::UNKNOWN_TYPE, @@ -1024,14 +1067,59 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( "If it happens during schema inference and you want to skip columns with " "unsupported types, you can enable setting input_format_{}" "_skip_columns_with_unsupported_types_in_schema_inference", - format_name, + settings.format_name, arrow_column->type()->name(), column_name, - boost::algorithm::to_lower_copy(format_name)); + boost::algorithm::to_lower_copy(settings.format_name)); } } } +static ColumnWithTypeAndName readColumnFromArrowColumn( + const std::shared_ptr & arrow_column, + std::string column_name, + std::unordered_map dictionary_infos, + DataTypePtr type_hint, + bool is_nullable_column, + bool is_map_nested_column, + const ReadColumnFromArrowColumnSettings & settings) +{ + bool read_as_nullable_column = arrow_column->null_count() || is_nullable_column || (type_hint && type_hint->isNullable()); + if (read_as_nullable_column && + arrow_column->type()->id() != arrow::Type::LIST && + arrow_column->type()->id() != arrow::Type::LARGE_LIST && + arrow_column->type()->id() != arrow::Type::MAP && + arrow_column->type()->id() != arrow::Type::STRUCT && + arrow_column->type()->id() != arrow::Type::DICTIONARY) + { + DataTypePtr nested_type_hint; + if (type_hint) + nested_type_hint = removeNullable(type_hint); + + auto nested_column = readNonNullableColumnFromArrowColumn(arrow_column, + column_name, + dictionary_infos, + nested_type_hint, + is_map_nested_column, + settings); + + if (!nested_column.column) + return {}; + + auto nullmap_column = readByteMapFromArrowColumn(arrow_column); + auto nullable_type = std::make_shared(std::move(nested_column.type)); + auto nullable_column = ColumnNullable::create(nested_column.column, nullmap_column); + + return {std::move(nullable_column), std::move(nullable_type), column_name}; + } + + return readNonNullableColumnFromArrowColumn(arrow_column, + column_name, + dictionary_infos, + type_hint, + is_map_nested_column, + settings); +} // Creating CH header by arrow schema. Will be useful in task about inserting // data from file without knowing table structure. @@ -1042,44 +1130,56 @@ static void checkStatus(const arrow::Status & status, const String & column_name throw Exception{ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column '{}': {}.", format_name, column_name, status.ToString()}; } +/// Create empty arrow column using specified field +static std::shared_ptr createArrowColumn(const std::shared_ptr & field, const String & format_name) +{ + arrow::MemoryPool * pool = arrow::default_memory_pool(); + std::unique_ptr array_builder; + arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); + checkStatus(status, field->name(), format_name); + + std::shared_ptr arrow_array; + status = array_builder->Finish(&arrow_array); + checkStatus(status, field->name(), format_name); + + return std::make_shared(arrow::ArrayVector{arrow_array}); +} Block ArrowColumnToCHColumn::arrowSchemaToCHHeader( - const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types, const Block * hint_header, bool ignore_case) + const arrow::Schema & schema, + const std::string & format_name, + bool skip_columns_with_unsupported_types) { + ReadColumnFromArrowColumnSettings settings + { + .format_name = format_name, + .date_time_overflow_behavior = FormatSettings::DateTimeOverflowBehavior::Ignore, + .allow_arrow_null_type = false, + .skip_columns_with_unsupported_types = skip_columns_with_unsupported_types + }; + ColumnsWithTypeAndName sample_columns; - std::unordered_set nested_table_names; - if (hint_header) - nested_table_names = Nested::getAllTableNames(*hint_header, ignore_case); for (const auto & field : schema.fields()) { - if (hint_header && !hint_header->has(field->name(), ignore_case) - && !nested_table_names.contains(ignore_case ? boost::to_lower_copy(field->name()) : field->name())) - continue; - /// Create empty arrow column by it's type and convert it to ClickHouse column. - arrow::MemoryPool * pool = arrow::default_memory_pool(); - std::unique_ptr array_builder; - arrow::Status status = MakeBuilder(pool, field->type(), &array_builder); - checkStatus(status, field->name(), format_name); + auto arrow_column = createArrowColumn(field, format_name); - std::shared_ptr arrow_array; - status = array_builder->Finish(&arrow_array); - checkStatus(status, field->name(), format_name); - - arrow::ArrayVector array_vector = {arrow_array}; - auto arrow_column = std::make_shared(array_vector); std::unordered_map dict_infos; - bool skipped = false; - bool allow_null_type = false; - if (hint_header && hint_header->has(field->name()) && hint_header->getByName(field->name()).type->isNullable()) - allow_null_type = true; - ColumnWithTypeAndName sample_column = readColumnFromArrowColumn( - arrow_column, field->name(), format_name, false, dict_infos, allow_null_type, skip_columns_with_unsupported_types, skipped); - if (!skipped) + + auto sample_column = readColumnFromArrowColumn( + arrow_column, + field->name(), + dict_infos, + nullptr /*nested_type_hint*/, + field->nullable() /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings); + + if (sample_column.column) sample_columns.emplace_back(std::move(sample_column)); } + return Block(std::move(sample_columns)); } @@ -1101,30 +1201,43 @@ ArrowColumnToCHColumn::ArrowColumnToCHColumn( { } -void ArrowColumnToCHColumn::arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values) +Chunk ArrowColumnToCHColumn::arrowTableToCHChunk(const std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values) { - NameToColumnPtr name_to_column_ptr; + NameToArrowColumn name_to_arrow_column; + for (auto column_name : table->ColumnNames()) { - std::shared_ptr arrow_column = table->GetColumnByName(column_name); + auto arrow_column = table->GetColumnByName(column_name); if (!arrow_column) throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' is duplicated", column_name); + auto arrow_field = table->schema()->GetFieldByName(column_name); + if (case_insensitive_matching) boost::to_lower(column_name); - name_to_column_ptr[std::move(column_name)] = arrow_column; + + name_to_arrow_column[std::move(column_name)] = {std::move(arrow_column), std::move(arrow_field)}; } - arrowColumnsToCHChunk(res, name_to_column_ptr, num_rows, block_missing_values); + return arrowColumnsToCHChunk(name_to_arrow_column, num_rows, block_missing_values); } -void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values) +Chunk ArrowColumnToCHColumn::arrowColumnsToCHChunk(const NameToArrowColumn & name_to_arrow_column, size_t num_rows, BlockMissingValues * block_missing_values) { - Columns columns_list; - columns_list.reserve(header.columns()); + ReadColumnFromArrowColumnSettings settings + { + .format_name = format_name, + .date_time_overflow_behavior = date_time_overflow_behavior, + .allow_arrow_null_type = true, + .skip_columns_with_unsupported_types = false + }; + + Columns columns; + columns.reserve(header.columns()); + std::unordered_map>> nested_tables; - bool skipped = false; - for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) + + for (size_t column_i = 0, header_columns = header.columns(); column_i < header_columns; ++column_i) { const ColumnWithTypeAndName & header_column = header.getByPosition(column_i); @@ -1133,15 +1246,17 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & boost::to_lower(search_column_name); ColumnWithTypeAndName column; - if (!name_to_column_ptr.contains(search_column_name)) + if (!name_to_arrow_column.contains(search_column_name)) { bool read_from_nested = false; + /// Check if it's a subcolumn from some struct. String nested_table_name = Nested::extractTableName(header_column.name); String search_nested_table_name = nested_table_name; if (case_insensitive_matching) boost::to_lower(search_nested_table_name); - if (name_to_column_ptr.contains(search_nested_table_name)) + + if (name_to_arrow_column.contains(search_nested_table_name)) { if (!nested_tables.contains(search_nested_table_name)) { @@ -1153,10 +1268,19 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } auto nested_table_type = Nested::collect(nested_columns).front().type; - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols = { - readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_infos, true, false, - skipped, date_time_overflow_behavior, nested_table_type)}; + const auto & arrow_column = name_to_arrow_column.find(search_nested_table_name)->second; + + ColumnsWithTypeAndName cols = + { + readColumnFromArrowColumn(arrow_column.column, + nested_table_name, + dictionary_infos, + nested_table_type, + arrow_column.field->nullable() /*is_nullable_column*/, + false /*is_map_nested_column*/, + settings) + }; + BlockPtr block_ptr = std::make_shared(cols); auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; @@ -1180,7 +1304,7 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & column.name = header_column.name; column.type = header_column.type; column.column = header_column.column->cloneResized(num_rows); - columns_list.push_back(std::move(column.column)); + columns.push_back(std::move(column.column)); if (block_missing_values) block_missing_values->setBits(column_i, num_rows); continue; @@ -1189,9 +1313,14 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } else { - auto arrow_column = name_to_column_ptr[search_column_name]; - column = readColumnFromArrowColumn( - arrow_column, header_column.name, format_name, false, dictionary_infos, true, false, skipped, date_time_overflow_behavior, header_column.type); + const auto & arrow_column = name_to_arrow_column.find(search_column_name)->second; + column = readColumnFromArrowColumn(arrow_column.column, + header_column.name, + dictionary_infos, + header_column.type, + arrow_column.field->nullable(), + false /*is_map_nested_column*/, + settings); } if (null_as_default) @@ -1216,10 +1345,10 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & } column.type = header_column.type; - columns_list.push_back(std::move(column.column)); + columns.push_back(std::move(column.column)); } - res.setColumns(columns_list, num_rows); + return Chunk(std::move(columns), num_rows); } } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h index 079e03749176..27e9afdf763a 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.h @@ -19,8 +19,6 @@ class Chunk; class ArrowColumnToCHColumn { public: - using NameToColumnPtr = std::unordered_map>; - ArrowColumnToCHColumn( const Block & header_, const std::string & format_name_, @@ -30,18 +28,13 @@ class ArrowColumnToCHColumn bool case_insensitive_matching_ = false, bool is_stream_ = false); - void arrowTableToCHChunk(Chunk & res, std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); - - void arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & name_to_column_ptr, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); + Chunk arrowTableToCHChunk(const std::shared_ptr & table, size_t num_rows, BlockMissingValues * block_missing_values = nullptr); - /// Transform arrow schema to ClickHouse header. If hint_header is provided, - /// we will skip columns in schema that are not in hint_header. + /// Transform arrow schema to ClickHouse header static Block arrowSchemaToCHHeader( const arrow::Schema & schema, const std::string & format_name, - bool skip_columns_with_unsupported_types = false, - const Block * hint_header = nullptr, - bool ignore_case = false); + bool skip_columns_with_unsupported_types = false); struct DictionaryInfo { @@ -52,6 +45,16 @@ class ArrowColumnToCHColumn private: + struct ArrowColumn + { + std::shared_ptr column; + std::shared_ptr field; + }; + + using NameToArrowColumn = std::unordered_map; + + Chunk arrowColumnsToCHChunk(const NameToArrowColumn & name_to_arrow_column, size_t num_rows, BlockMissingValues * block_missing_values); + const Block & header; const std::string format_name; /// If false, throw exception if some columns in header not exists in arrow table. diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index a41eacf26b7a..aa83b87b2d27 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -71,12 +71,10 @@ Chunk ORCBlockInputFormat::read() approx_bytes_read_for_chunk = file_reader->GetRawORCReader()->getStripe(stripe_current)->getDataLength(); ++stripe_current; - Chunk res; /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &block_missing_values : nullptr; - arrow_column_to_ch_column->arrowTableToCHChunk(res, table, num_rows, block_missing_values_ptr); - return res; + return arrow_column_to_ch_column->arrowTableToCHChunk(table, num_rows, block_missing_values_ptr); } void ORCBlockInputFormat::resetParser() diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index 62e576d49535..d41cb3447deb 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -601,7 +601,7 @@ void ParquetBlockInputFormat::decodeOneChunk(size_t row_group_batch_idx, std::un /// If defaults_for_omitted_fields is true, calculate the default values from default expression for omitted fields. /// Otherwise fill the missing columns with zero values of its type. BlockMissingValues * block_missing_values_ptr = format_settings.defaults_for_omitted_fields ? &res.block_missing_values : nullptr; - row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(res.chunk, *tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); + res.chunk = row_group_batch.arrow_column_to_ch_column->arrowTableToCHChunk(*tmp_table, (*tmp_table)->num_rows(), block_missing_values_ptr); lock.lock(); diff --git a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp index 3584f137225d..14a912a180de 100644 --- a/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp +++ b/src/Storages/DataLakes/DeltaLakeMetadataParser.cpp @@ -282,11 +282,10 @@ struct DeltaLakeMetadataParser::Impl format_settings.date_time_overflow_behavior, /* case_insensitive_column_matching */false); - Chunk res; std::shared_ptr table; THROW_ARROW_NOT_OK(reader->ReadTable(&table)); - column_reader.arrowTableToCHChunk(res, table, reader->parquet_reader()->metadata()->num_rows()); + Chunk res = column_reader.arrowTableToCHChunk(table, reader->parquet_reader()->metadata()->num_rows()); const auto & res_columns = res.getColumns(); if (res_columns.size() != 2) From 1e05d9ed3f4a960ca6a219514fd56fa13c644efc Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Sun, 7 Apr 2024 20:36:02 +0300 Subject: [PATCH 322/470] Added tests --- .../03036_parquet_arrow_nullable.reference | 40 ++++++++++++ .../03036_parquet_arrow_nullable.sh | 63 +++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 tests/queries/0_stateless/03036_parquet_arrow_nullable.reference create mode 100755 tests/queries/0_stateless/03036_parquet_arrow_nullable.sh diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference new file mode 100644 index 000000000000..8820bb7cb9f7 --- /dev/null +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.reference @@ -0,0 +1,40 @@ +Parquet +a UInt64 +a_nullable Nullable(UInt64) +Arrow +a UInt64 +a_nullable Nullable(UInt64) +Parquet +b Array(Nullable(UInt64)) +b_nullable Array(Nullable(UInt64)) +Arrow +b Array(Nullable(UInt64)) +b_nullable Array(Nullable(UInt64)) +Parquet +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +Arrow +c Tuple(\n a UInt64,\n b String) +c_nullable Tuple(\n a Nullable(UInt64),\n b Nullable(String)) +Parquet +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +Arrow +d Tuple(\n a UInt64,\n b Tuple(\n a UInt64,\n b String),\n d_nullable Tuple(\n a UInt64,\n b Tuple(\n a Nullable(UInt64),\n b Nullable(String)))) +Parquet +e Map(UInt64, Nullable(String)) +e_nullable Map(UInt64, Nullable(String)) +Arrow +e Map(UInt64, Nullable(String)) +e_nullable Map(UInt64, Nullable(String)) +Parquet +f Map(UInt64, Map(UInt64, Nullable(String))) +f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +Arrow +f Map(UInt64, Map(UInt64, Nullable(String))) +f_nullables Map(UInt64, Map(UInt64, Nullable(String))) +Parquet +g String +g_nullable Nullable(String) +Arrow +g LowCardinality(String) +g_nullable LowCardinality(String) diff --git a/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh new file mode 100755 index 000000000000..bdd641e2b943 --- /dev/null +++ b/tests/queries/0_stateless/03036_parquet_arrow_nullable.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data + +formats="Parquet Arrow" + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('a UInt64, a_nullable Nullable(UInt64)', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('b Array(UInt64), b_nullable Array(Nullable(UInt64))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('c Tuple(a UInt64, b String), c_nullable Tuple(a Nullable(UInt64), b Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('d Tuple(a UInt64, b Tuple(a UInt64, b String), d_nullable Tuple(a UInt64, b Tuple(a Nullable(UInt64), b Nullable(String))))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('e Map(UInt64, String), e_nullable Map(UInt64, Nullable(String))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('f Map(UInt64, Map(UInt64, String)), f_nullables Map(UInt64, Map(UInt64, Nullable(String)))', 42) limit 10 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +for format in $formats +do + echo $format + $CLICKHOUSE_LOCAL -q "select * from generateRandom('g LowCardinality(String), g_nullable LowCardinality(Nullable(String))', 42) limit 10 settings output_format_arrow_low_cardinality_as_dictionary=1, allow_suspicious_low_cardinality_types=1 format $format" > $DATA_FILE + $CLICKHOUSE_LOCAL -q "desc file('$DATA_FILE') SETTINGS schema_inference_make_columns_nullable = 0" +done + +rm $DATA_FILE + From 373bf4968408e41392f5dc7e6791fc61143939f9 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 7 Apr 2024 20:05:41 +0200 Subject: [PATCH 323/470] fix --- src/Storages/WindowView/StorageWindowView.cpp | 2 +- .../queries/0_stateless/01069_window_view_proc_tumble_watch.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index f82f5b079109..04c26053dbab 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -1710,7 +1710,7 @@ void StorageWindowView::throwIfWindowViewIsDisabled(ContextPtr local_context) co { if (disabled_due_to_analyzer || (local_context && local_context->getSettingsRef().allow_experimental_analyzer)) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Experimental WINDOW VIEW feature is not supported " - "with new infrastructure for query analysis (the setting 'allow_experimental_analyzer')"); + "in the current infrastructure for query analysis (the setting 'allow_experimental_analyzer')"); } void registerStorageWindowView(StorageFactory & factory) diff --git a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py index eb31b2ccbcf1..21c2e831afc5 100755 --- a/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py +++ b/tests/queries/0_stateless/01069_window_view_proc_tumble_watch.py @@ -28,6 +28,8 @@ client1.expect(prompt) client2.send("SET allow_experimental_window_view = 1") client2.expect(prompt) + client2.send("SET allow_experimental_analyzer = 0") + client2.expect(prompt) client1.send("CREATE DATABASE 01069_window_view_proc_tumble_watch") client1.expect(prompt) From 4b5db7357caf36ca797293ef1ec81df7c492e3cb Mon Sep 17 00:00:00 2001 From: Eduard Karacharov Date: Sun, 7 Apr 2024 20:48:59 +0300 Subject: [PATCH 324/470] docs: predefined query handler example fix --- docs/en/interfaces/http.md | 18 ++++++++++-------- docs/ru/interfaces/http.md | 18 ++++++++++-------- docs/zh/interfaces/http.md | 21 ++++++++++++--------- 3 files changed, 32 insertions(+), 25 deletions(-) diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 4eeb19cefcfa..bba5cde16f1a 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -507,16 +507,18 @@ Example: ``` xml - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> GET TEST_HEADER_VALUE - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> predefined_query_handler - SELECT value FROM system.settings WHERE name = {name_1:String} - SELECT name, value FROM system.settings WHERE name = {name_2:String} + + SELECT name, value FROM system.settings + WHERE name IN ({name_1:String}, {name_2:String}) + @@ -524,13 +526,13 @@ Example: ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' -1 -max_final_threads 2 +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2' +max_final_threads 2 +max_threads 1 ``` :::note -In one `predefined_query_handler` only supports one `query` of an insert type. +In one `predefined_query_handler` only one `query` is supported. ::: ### dynamic_query_handler {#dynamic_query_handler} diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index be8cfbdda6c7..5f11f1b430bd 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -434,16 +434,18 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` xml - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> GET TEST_HEADER_VALUE - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> predefined_query_handler - SELECT value FROM system.settings WHERE name = {name_1:String} - SELECT name, value FROM system.settings WHERE name = {name_2:String} + + SELECT name, value FROM system.settings + WHERE name IN ({name_1:String}, {name_2:String}) + @@ -451,13 +453,13 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' -1 -max_final_threads 2 +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2' +max_final_threads 2 +max_threads 1 ``` :::note Предупреждение -В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`. +В одном `predefined_query_handler` поддерживается только один запрос. ::: ### dynamic_query_handler {#dynamic_query_handler} diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index 84ca5ed0c47e..f55cf41936f4 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -427,29 +427,32 @@ $ curl -v 'http://localhost:8123/predefined_query' ``` xml - [^/]+)(/(?P[^/]+))?]]> - GET + [^/]+)]]> + GET TEST_HEADER_VALUE - [^/]+)(/(?P[^/]+))?]]> + [^/]+)]]> predefined_query_handler - SELECT value FROM system.settings WHERE name = {name_1:String} - SELECT name, value FROM system.settings WHERE name = {name_2:String} + + SELECT name, value FROM system.settings + WHERE name IN ({name_1:String}, {name_2:String}) + + ``` ``` bash -$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost:8123/query_param_with_url/1/max_threads/max_final_threads?max_threads=1&max_final_threads=2' -1 -max_final_threads 2 +$ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_final_threads' 'http://localhost:8123/query_param_with_url/max_threads?max_threads=1&max_final_threads=2' +max_final_threads 2 +max_threads 1 ``` :::warning -在一个`predefined_query_handler`中,只支持insert类型的一个`查询`。 +在一个`predefined_query_handler`中,只支持的一个`查询`。 ::: ### 动态查询 {#dynamic_query_handler} From f5e9a09d69ea0d1f961464e866c77a73c5c0e82e Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 05:20:09 +0200 Subject: [PATCH 325/470] Remove support for INSERT WATCH query --- src/Client/ClientBase.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 14 +++----------- src/Interpreters/executeQuery.cpp | 11 ----------- src/Parsers/ASTInsertQuery.cpp | 7 +------ src/Parsers/ASTInsertQuery.h | 2 -- src/Parsers/ParserInsertQuery.cpp | 15 +-------------- 6 files changed, 6 insertions(+), 45 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7a3192d1d9cf..8107bd943943 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -1964,7 +1964,7 @@ void ClientBase::processParsedSingleQuery(const String & full_query, const Strin } /// INSERT query for which data transfer is needed (not an INSERT SELECT or input()) is processed separately. - if (insert && (!insert->select || input_function) && !insert->watch && !is_async_insert_with_inlined_data) + if (insert && (!insert->select || input_function) && !is_async_insert_with_inlined_data) { if (input_function && insert->format.empty()) throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index fc58f7b50988..35ff65c23358 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -340,13 +340,10 @@ bool InterpreterInsertQuery::shouldAddSquashingFroStorage(const StoragePtr & tab { auto context_ptr = getContext(); const Settings & settings = context_ptr->getSettingsRef(); - const ASTInsertQuery * query = nullptr; - if (query_ptr) - query = query_ptr->as(); /// Do not squash blocks if it is a sync INSERT into Distributed, since it lead to double bufferization on client and server side. /// Client-side bufferization might cause excessive timeouts (especially in case of big blocks). - return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash && !(query && query->watch); + return !(settings.distributed_foreground_insert && table->isRemote()) && !async_insert && !no_squash; } Chain InterpreterInsertQuery::buildPreSinkChain( @@ -429,7 +426,7 @@ BlockIO InterpreterInsertQuery::execute() std::vector presink_chains; std::vector sink_chains; - if (!distributed_pipeline || query.watch) + if (!distributed_pipeline) { /// Number of streams works like this: /// * For the SELECT, use `max_threads`, or `max_insert_threads`, or whatever @@ -560,11 +557,6 @@ BlockIO InterpreterInsertQuery::execute() } } } - else if (query.watch) - { - InterpreterWatchQuery interpreter_watch{ query.watch, getContext() }; - pipeline = interpreter_watch.buildQueryPipeline(); - } ThreadGroupPtr running_group; if (current_thread) @@ -591,7 +583,7 @@ BlockIO InterpreterInsertQuery::execute() { res.pipeline = std::move(*distributed_pipeline); } - else if (query.select || query.watch) + else if (query.select) { const auto & header = presink_chains.at(0).getInputHeader(); auto actions_dag = ActionsDAG::makeConvertingActions( diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index ea2f69bd2b15..96a9c8d8c8ef 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -644,15 +644,6 @@ void logExceptionBeforeStart( } } -static void setQuerySpecificSettings(ASTPtr & ast, ContextMutablePtr context) -{ - if (auto * ast_insert_into = ast->as()) - { - if (ast_insert_into->watch) - context->setSetting("output_format_enable_streaming", 1); - } -} - void validateAnalyzerSettings(ASTPtr ast, bool context_value) { if (ast->as()) @@ -898,8 +889,6 @@ static std::tuple executeQueryImpl( if (auto * insert_query = ast->as()) insert_query->tail = istr; - setQuerySpecificSettings(ast, context); - /// There is an option of probabilistic logging of queries. /// If it is used - do the random sampling and "collapse" the settings. /// It allows to consistently log queries with all the subqueries in distributed query processing diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index 72a569fe0471..8e3458539f3b 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -123,13 +123,8 @@ void ASTInsertQuery::formatImpl(const FormatSettings & settings, FormatState & s settings.ostr << delim; select->formatImpl(settings, state, frame); } - else if (watch) - { - settings.ostr << delim; - watch->formatImpl(settings, state, frame); - } - if (!select && !watch) + if (!select) { if (!format.empty()) { diff --git a/src/Parsers/ASTInsertQuery.h b/src/Parsers/ASTInsertQuery.h index b0f444ed7558..aeab0f148bec 100644 --- a/src/Parsers/ASTInsertQuery.h +++ b/src/Parsers/ASTInsertQuery.h @@ -24,7 +24,6 @@ class ASTInsertQuery : public IAST ASTPtr settings_ast; ASTPtr select; - ASTPtr watch; ASTPtr infile; ASTPtr compression; @@ -63,7 +62,6 @@ class ASTInsertQuery : public IAST if (partition_by) { res->partition_by = partition_by->clone(); res->children.push_back(res->partition_by); } if (settings_ast) { res->settings_ast = settings_ast->clone(); res->children.push_back(res->settings_ast); } if (select) { res->select = select->clone(); res->children.push_back(res->select); } - if (watch) { res->watch = watch->clone(); res->children.push_back(res->watch); } if (infile) { res->infile = infile->clone(); res->children.push_back(res->infile); } if (compression) { res->compression = compression->clone(); res->children.push_back(res->compression); } diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index d1171dd48155..9373e6a1c936 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -36,7 +36,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_format(Keyword::FORMAT); ParserKeyword s_settings(Keyword::SETTINGS); ParserKeyword s_select(Keyword::SELECT); - ParserKeyword s_watch(Keyword::WATCH); ParserKeyword s_partition_by(Keyword::PARTITION_BY); ParserKeyword s_with(Keyword::WITH); ParserToken s_lparen(TokenType::OpeningRoundBracket); @@ -56,7 +55,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ASTPtr columns; ASTPtr format; ASTPtr select; - ASTPtr watch; ASTPtr table_function; ASTPtr settings_ast; ASTPtr partition_by_expr; @@ -143,7 +141,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) String format_str; Pos before_values = pos; - /// VALUES or FORMAT or SELECT or WITH or WATCH. + /// VALUES or FORMAT or SELECT or WITH. /// After FROM INFILE we expect FORMAT, SELECT, WITH or nothing. if (!infile && s_values.ignore(pos, expected)) { @@ -175,14 +173,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) tryGetIdentifierNameInto(format, format_str); } - else if (!infile && s_watch.ignore(pos, expected)) - { - /// If WATCH is defined, return to position before WATCH and parse - /// rest of query as WATCH query. - pos = before_values; - ParserWatchQuery watch_p; - watch_p.parse(pos, watch, expected); - } else if (!infile) { /// If all previous conditions were false and it's not FROM INFILE, query is incorrect @@ -286,7 +276,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->columns = columns; query->format = std::move(format_str); query->select = select; - query->watch = watch; query->settings_ast = settings_ast; query->data = data != end ? data : nullptr; query->end = end; @@ -295,8 +284,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->children.push_back(columns); if (select) query->children.push_back(select); - if (watch) - query->children.push_back(watch); if (settings_ast) query->children.push_back(settings_ast); From fb31ad1736aa6bfc758f40a86dcac5afbc07a01b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Apr 2024 06:57:56 +0000 Subject: [PATCH 326/470] Fix another test --- src/Core/SettingsChangesHistory.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index f43ca154d56f..7fa12780c8c0 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,6 +87,7 @@ static std::map sett { {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, + {"query_cache_system_table_handling", QueryCacheSystemTableHandling::Save, QueryCacheSystemTableHandling::Throw, "The query cache no longer caches results of queries against system tables"}, }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, From b8f6217168b56f0c896d3508ec827e0ec94a3bd0 Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 8 Apr 2024 09:57:33 +0000 Subject: [PATCH 327/470] Use function isNotDistinctFrom only in join key --- src/Analyzer/Passes/CrossToInnerJoinPass.cpp | 50 ++----------------- .../Passes/LogicalExpressionOptimizerPass.cpp | 23 +++++++-- src/Analyzer/Utils.cpp | 48 ++++++++++++++++++ src/Analyzer/Utils.h | 3 ++ ...11_join_on_nullsafe_optimization.reference | 8 +++ .../02911_join_on_nullsafe_optimization.sql | 3 ++ 6 files changed, 87 insertions(+), 48 deletions(-) diff --git a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp index d0a5656d3344..3e2a2055fdba 100644 --- a/src/Analyzer/Passes/CrossToInnerJoinPass.cpp +++ b/src/Analyzer/Passes/CrossToInnerJoinPass.cpp @@ -15,6 +15,7 @@ #include #include +#include namespace DB @@ -61,47 +62,7 @@ const QueryTreeNodePtr & getEquiArgument(const QueryTreeNodePtr & cond, size_t i return func->getArguments().getNodes()[index]; } - -/// Check that node has only one source and return it. -/// {_, false} - multiple sources -/// {nullptr, true} - no sources -/// {source, true} - single source -std::pair getExpressionSource(const QueryTreeNodePtr & node) -{ - if (const auto * column = node->as()) - { - auto source = column->getColumnSourceOrNull(); - if (!source) - return {nullptr, false}; - return {source.get(), true}; - } - - if (const auto * func = node->as()) - { - const IQueryTreeNode * source = nullptr; - const auto & args = func->getArguments().getNodes(); - for (const auto & arg : args) - { - auto [arg_source, is_ok] = getExpressionSource(arg); - if (!is_ok) - return {nullptr, false}; - - if (!source) - source = arg_source; - else if (arg_source && !source->isEqual(*arg_source)) - return {nullptr, false}; - } - return {source, true}; - - } - - if (node->as()) - return {nullptr, true}; - - return {nullptr, false}; -} - -bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr & table_expression) +bool findInTableExpression(const QueryTreeNodePtr & source, const QueryTreeNodePtr & table_expression) { if (!source) return true; @@ -115,7 +76,6 @@ bool findInTableExpression(const IQueryTreeNode * source, const QueryTreeNodePtr || findInTableExpression(source, join_node->getRightTableExpression()); } - return false; } @@ -169,10 +129,10 @@ class CrossToInnerJoinVisitor : public InDepthQueryTreeVisitorWithContext; - explicit JoinOnLogicalExpressionOptimizerVisitor(ContextPtr context) + explicit JoinOnLogicalExpressionOptimizerVisitor(const JoinNode * join_node_, ContextPtr context) : Base(std::move(context)) + , join_node(join_node_) {} void enterImpl(QueryTreeNodePtr & node) @@ -55,10 +56,11 @@ class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWi } private: + const JoinNode * join_node; bool need_rerun_resolve = false; /// Returns true if type of some operand is changed and parent function needs to be re-resolved - static bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) + bool tryOptimizeIsNotDistinctOrIsNull(QueryTreeNodePtr & node, const ContextPtr & context) { auto & function_node = node->as(); chassert(function_node.getFunctionName() == "or"); @@ -93,6 +95,21 @@ class JoinOnLogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWi const auto & func_name = argument_function->getFunctionName(); if (func_name == "equals" || func_name == "isNotDistinctFrom") { + const auto & argument_nodes = argument_function->getArguments().getNodes(); + if (argument_nodes.size() != 2) + continue; + /// We can rewrite to a <=> b only if we are joining on a and b, + /// because the function is not yet implemented for other cases. + auto first_src = getExpressionSource(argument_nodes[0]); + auto second_src = getExpressionSource(argument_nodes[1]); + if (!first_src || !second_src) + continue; + const auto & lhs_join = *join_node->getLeftTableExpression(); + const auto & rhs_join = *join_node->getRightTableExpression(); + bool arguments_from_both_sides = (first_src->isEqual(lhs_join) && second_src->isEqual(rhs_join)) || + (first_src->isEqual(rhs_join) && second_src->isEqual(lhs_join)); + if (!arguments_from_both_sides) + continue; equals_functions_indices.push_back(or_operands.size() - 1); } else if (func_name == "and") @@ -231,7 +248,7 @@ class LogicalExpressionOptimizerVisitor : public InDepthQueryTreeVisitorWithCont /// Operator <=> is not supported outside of JOIN ON section if (join_node->hasJoinExpression()) { - JoinOnLogicalExpressionOptimizerVisitor join_on_visitor(getContext()); + JoinOnLogicalExpressionOptimizerVisitor join_on_visitor(join_node, getContext()); join_on_visitor.visit(join_node->getJoinExpression()); } return; diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 8ccf95deadc7..71f4cd350c33 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -760,4 +760,52 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +/** Returns: + * {_, false} - multiple sources + * {nullptr, true} - no sources (for constants) + * {source, true} - single source + */ +std::pair getExpressionSourceImpl(const QueryTreeNodePtr & node) +{ + if (const auto * column = node->as()) + { + auto source = column->getColumnSourceOrNull(); + if (!source) + return {nullptr, false}; + return {source, true}; + } + + if (const auto * func = node->as()) + { + QueryTreeNodePtr source = nullptr; + const auto & args = func->getArguments().getNodes(); + for (const auto & arg : args) + { + auto [arg_source, is_ok] = getExpressionSourceImpl(arg); + if (!is_ok) + return {nullptr, false}; + + if (!source) + source = arg_source; + else if (arg_source && !source->isEqual(*arg_source)) + return {nullptr, false}; + } + return {source, true}; + + } + + if (node->as()) + return {nullptr, true}; + + return {nullptr, false}; +} + +QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node) +{ + auto [source, is_ok] = getExpressionSourceImpl(node); + if (!is_ok) + return nullptr; + return source; +} + } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 8e32ef0464c1..b708796a2962 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -105,4 +105,7 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Checks that node has only one source and returns it +QueryTreeNodePtr getExpressionSource(const QueryTreeNodePtr & node); + } diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference index 976c1503b028..5b6c14ca24f4 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.reference @@ -8,6 +8,14 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND 2 2 2 2 3 3 3 33 \N \N \N \N +SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; +2 2 2 2 +3 3 3 33 +\N \N \N \N +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; +2 2 2 2 +3 3 3 33 +\N \N \N \N SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; 2 2 2 2 \N \N \N \N diff --git a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql index 6a98a7bb57bb..5458370db8c8 100644 --- a/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql +++ b/tests/queries/0_stateless/02911_join_on_nullsafe_optimization.sql @@ -14,6 +14,9 @@ SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR (t1.x IS NULL AND t2.x IS NULL)) O SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.x IS NULL AND t1.y <=> t2.y AND t2.x IS NULL) ORDER BY t1.x NULLS LAST; +SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) ORDER BY t1.x; +SELECT * FROM t1 JOIN t2 ON t1.x <=> t2.x AND (t1.x = t1.y OR t1.x IS NULL AND t1.y IS NULL) ORDER BY t1.x; + SELECT * FROM t1 JOIN t2 ON (t1.x = t2.x OR t1.x IS NULL AND t2.x IS NULL) AND t1.y <=> t2.y ORDER BY t1.x NULLS LAST; SELECT * FROM t1 JOIN t2 ON (t1.x <=> t2.x OR t1.y <=> t2.y OR (t1.x IS NULL AND t1.y IS NULL AND t2.x IS NULL AND t2.y IS NULL)) ORDER BY t1.x NULLS LAST; From 057893c3107a3e14f4ec2d9c54a2664129125de4 Mon Sep 17 00:00:00 2001 From: peter279k Date: Mon, 8 Apr 2024 14:32:46 +0800 Subject: [PATCH 328/470] Add checksum to validate the downloaded archive --- docs/en/getting-started/example-datasets/menus.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md index 32fe62865d4e..5a35c1d45bc2 100644 --- a/docs/en/getting-started/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -18,6 +18,9 @@ Run the command: ```bash wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz +# Option: Validate the checksum +md5sum 2021_08_01_07_01_17_data.tgz +# Checksum should be equal to: db6126724de939a5481e3160a2d67d15 ``` Replace the link to the up to date link from http://menus.nypl.org/data if needed. From b369291d47acb9e2e37cd91ea7063d13087259fa Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 1 Apr 2024 01:09:52 +0200 Subject: [PATCH 329/470] Rich syntax highlighting in the client --- src/Client/ClientBaseHelpers.cpp | 121 ++++++++++------------- src/Parsers/ASTOrderByElement.cpp | 1 - src/Parsers/CommonParsers.h | 2 + src/Parsers/ExpressionElementParsers.cpp | 2 +- src/Parsers/ExpressionElementParsers.h | 14 ++- src/Parsers/ExpressionListParsers.cpp | 83 ++++++++++------ src/Parsers/IParser.cpp | 23 +++++ src/Parsers/IParser.h | 37 +++++++ src/Parsers/IParserBase.cpp | 15 ++- 9 files changed, 193 insertions(+), 105 deletions(-) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index b08626962957..22f6c8912b12 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -1,11 +1,11 @@ #include "ClientBaseHelpers.h" - #include #include -#include +#include #include + namespace DB { @@ -96,77 +96,64 @@ void highlight(const String & query, std::vector & colors { using namespace replxx; - static const std::unordered_map token_to_color - = {{TokenType::Whitespace, Replxx::Color::DEFAULT}, - {TokenType::Comment, Replxx::Color::GRAY}, - {TokenType::BareWord, Replxx::Color::DEFAULT}, - {TokenType::Number, Replxx::Color::GREEN}, - {TokenType::StringLiteral, Replxx::Color::CYAN}, - {TokenType::QuotedIdentifier, Replxx::Color::MAGENTA}, - {TokenType::OpeningRoundBracket, Replxx::Color::BROWN}, - {TokenType::ClosingRoundBracket, Replxx::Color::BROWN}, - {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, - {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, - {TokenType::DoubleColon, Replxx::Color::BROWN}, - {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, - - {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::VerticalDelimiter, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::HereDoc, Replxx::Color::CYAN}, - {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Spaceship, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, - {TokenType::DoubleAt, Replxx::Color::MAGENTA}, - - {TokenType::EndOfStream, Replxx::Color::DEFAULT}, - - {TokenType::Error, Replxx::Color::RED}, - {TokenType::ErrorMultilineCommentIsNotClosed, Replxx::Color::RED}, - {TokenType::ErrorSingleQuoteIsNotClosed, Replxx::Color::RED}, - {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, - {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, - {TokenType::ErrorWrongNumber, Replxx::Color::RED}, - {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; - - const Replxx::Color unknown_token_color = Replxx::Color::RED; - - Lexer lexer(query.data(), query.data() + query.size()); - size_t pos = 0; + if (colors.empty()) + return; - for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken()) + static const std::unordered_map type_to_color = + { + {Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)}, + {Highlight::identifier, Replxx::Color::CYAN}, + {Highlight::function, Replxx::Color::BROWN}, + {Highlight::alias, Replxx::Color::MAGENTA}, + {Highlight::substitution, Replxx::Color::MAGENTA}, + {Highlight::number, Replxx::Color::BRIGHTGREEN}, + {Highlight::string, Replxx::Color::GREEN}, + }; + + const char * begin = query.data(); + const char * end = begin + query.size(); + Tokens tokens(begin, end, 1000, true); + IParser::Pos token_iterator(tokens, static_cast(1000), static_cast(10000)); + Expected expected; + ParserQuery parser(end); + ASTPtr ast; + bool parse_res = false; + + try + { + parse_res = parser.parse(token_iterator, ast, expected); + } + catch (...) { - if (token.type == TokenType::Semicolon || token.type == TokenType::VerticalDelimiter) - ReplxxLineReader::setLastIsDelimiter(true); - else if (token.type != TokenType::Whitespace) - ReplxxLineReader::setLastIsDelimiter(false); + return; + } - size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(token.begin), token.size()); - for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) + size_t pos = 0; + const char * prev = begin; + for (const auto & range : expected.highlights) + { + auto it = type_to_color.find(range.highlight); + if (it != type_to_color.end()) { - if (token_to_color.find(token.type) != token_to_color.end()) - colors[pos + code_point_index] = token_to_color.at(token.type); - else - colors[pos + code_point_index] = unknown_token_color; + pos += UTF8::countCodePoints(reinterpret_cast(prev), range.begin - prev); + size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(range.begin), range.end - range.begin); + + for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) + colors[pos + code_point_index] = it->second; + + pos += utf8_len; + prev = range.end; } + } + + if (!parse_res) + { + pos += UTF8::countCodePoints(reinterpret_cast(prev), expected.max_parsed_pos - prev); + + if (pos >= colors.size()) + pos = colors.size() - 1; - pos += utf8_len; + colors[pos] = Replxx::Color::BRIGHTRED; } } #endif diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index be0416359a18..09193a8b5e16 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -1,4 +1,3 @@ -#include #include #include #include diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 49964b5c7281..2277e348b0f2 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -601,6 +601,8 @@ class ParserKeyword : public IParserBase constexpr const char * getName() const override { return s.data(); } + Highlight highlight() const override { return Highlight::keyword; } + protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 2c8ab65d1fc6..d5a67c09f9fc 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -278,7 +278,7 @@ bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, E bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; - if (!ParserList(std::make_unique(allow_query_parameter), std::make_unique(TokenType::Dot), false) + if (!ParserList(std::make_unique(allow_query_parameter, highlight_type), std::make_unique(TokenType::Dot), false) .parse(pos, id_list, expected)) return false; diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index b29f5cc42510..61e35690938b 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -25,12 +25,15 @@ class ParserSubquery : public IParserBase class ParserIdentifier : public IParserBase { public: - explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} + explicit ParserIdentifier(bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) + : allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) {} + Highlight highlight() const override { return highlight_type; } protected: const char * getName() const override { return "identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool allow_query_parameter; + Highlight highlight_type; }; @@ -53,8 +56,8 @@ class ParserTableAsStringLiteralIdentifier : public IParserBase class ParserCompoundIdentifier : public IParserBase { public: - explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false) - : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_) + explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) + : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) { } @@ -63,6 +66,7 @@ class ParserCompoundIdentifier : public IParserBase bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool table_name_with_optional_uuid; bool allow_query_parameter; + Highlight highlight_type; }; /** *, t.*, db.table.*, COLUMNS('') APPLY(...) or EXCEPT(...) or REPLACE(...) @@ -253,6 +257,7 @@ class ParserNumber : public IParserBase protected: const char * getName() const override { return "number"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + Highlight highlight() const override { return Highlight::number; } }; /** Unsigned integer, used in right hand side of tuple access operator (x.1). @@ -273,6 +278,7 @@ class ParserStringLiteral : public IParserBase protected: const char * getName() const override { return "string literal"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + Highlight highlight() const override { return Highlight::string; } }; @@ -363,6 +369,7 @@ class ParserAlias : public IParserBase const char * getName() const override { return "alias"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + Highlight highlight() const override { return Highlight::alias; } }; @@ -385,6 +392,7 @@ class ParserSubstitution : public IParserBase protected: const char * getName() const override { return "substitution"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; + Highlight highlight() const override { return Highlight::substitution; } }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 05691529f430..276b4e820742 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -441,6 +441,21 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return parser.parse(pos, node, expected); } +namespace +{ + /// This wrapper is needed to highlight function names differently. + class ParserFunctionName : public IParserBase + { + protected: + const char * getName() const override { return "function name"; } + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + ParserCompoundIdentifier parser(false, true, Highlight::function); + return parser.parse(pos, node, expected); + } + }; +} + enum class Action { @@ -809,6 +824,7 @@ struct ParserExpressionImpl static const Operator finish_between_operator; + ParserFunctionName function_name_parser; ParserCompoundIdentifier identifier_parser{false, true}; ParserNumber number_parser; ParserAsterisk asterisk_parser; @@ -2359,7 +2375,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr identifier; - if (ParserCompoundIdentifier(false,true).parse(pos, identifier, expected) + if (ParserFunctionName().parse(pos, identifier, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters); @@ -2497,7 +2513,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { if (typeid_cast(layers.back().get()) || typeid_cast(layers.back().get())) { - if (identifier_parser.parse(pos, tmp, expected) + if (function_name_parser.parse(pos, tmp, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); @@ -2629,49 +2645,52 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { layers.back()->pushOperand(std::move(tmp)); } - else if (identifier_parser.parse(pos, tmp, expected)) + else { - if (pos->type == TokenType::OpeningRoundBracket) + old_pos = pos; + if (function_name_parser.parse(pos, tmp, expected) && pos->type == TokenType::OpeningRoundBracket) { ++pos; layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); return Action::OPERAND; } - else + pos = old_pos; + + if (identifier_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); } - } - else if (substitution_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else if (pos->type == TokenType::OpeningRoundBracket) - { - - if (subquery_parser.parse(pos, tmp, expected)) + else if (substitution_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); - return Action::OPERATOR; } + else if (pos->type == TokenType::OpeningRoundBracket) + { - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (pos->type == TokenType::OpeningSquareBracket) - { - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (mysql_global_variable_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else - { - return Action::NONE; + if (subquery_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + return Action::OPERATOR; + } + + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else + { + return Action::NONE; + } } return Action::OPERATOR; diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index 41981a4bb8aa..eb4ddfa01d24 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -9,6 +9,7 @@ namespace ErrorCodes extern const int TOO_SLOW_PARSING; } + IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) { depth = rhs.depth; @@ -32,4 +33,26 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) return *this; } + +template +static bool intersects(T a_begin, T a_end, T b_begin, T b_end) +{ + return (a_begin <= b_begin && b_begin < a_end) + || (b_begin <= a_begin && a_begin < b_end); +} + + +void Expected::highlight(HighlightedRange range) +{ + auto it = highlights.lower_bound(range); + while (it != highlights.end() && range.begin < it->end) + { + if (intersects(range.begin, range.end, it->begin, it->end)) + it = highlights.erase(it); + else + ++it; + } + highlights.insert(range); +} + } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 291f8ee7d44a..d79bc0fb9998 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -21,6 +22,30 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +enum class Highlight +{ + none = 0, + keyword, + identifier, + function, + alias, + substitution, + number, + string, +}; + +struct HighlightedRange +{ + const char * begin; + const char * end; + Highlight highlight; + + auto operator<=>(const HighlightedRange & other) const + { + return begin <=> other.begin; + } +}; + /** Collects variants, how parser could proceed further at rightmost position. */ @@ -29,6 +54,8 @@ struct Expected absl::InlinedVector variants; const char * max_parsed_pos = nullptr; + std::set highlights; + /// 'description' should be statically allocated string. ALWAYS_INLINE void add(const char * current_pos, const char * description) { @@ -48,6 +75,8 @@ struct Expected { add(it->begin, description); } + + void highlight(HighlightedRange range); }; @@ -158,6 +187,14 @@ class IParser return parse(pos, node, expected); } + /** If the parsed fragment should be highlighted in the query editor, + * which type of highlighting to use? + */ + virtual Highlight highlight() const + { + return Highlight::none; + } + virtual ~IParser() = default; }; diff --git a/src/Parsers/IParserBase.cpp b/src/Parsers/IParserBase.cpp index 0241250926dc..1293dd7dacb2 100644 --- a/src/Parsers/IParserBase.cpp +++ b/src/Parsers/IParserBase.cpp @@ -10,8 +10,21 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) return wrapParseImpl(pos, IncreaseDepthTag{}, [&] { + const char * begin = pos->begin; bool res = parseImpl(pos, node, expected); - if (!res) + if (res) + { + Highlight type = highlight(); + if (type != Highlight::none) + { + HighlightedRange range; + range.begin = begin; + range.end = pos->begin; + range.highlight = type; + expected.highlight(range); + } + } + else node = nullptr; return res; }); From b9406f79d168f6a18881bbabc89826de36a155a1 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 4 Apr 2024 23:10:34 +0200 Subject: [PATCH 330/470] Better highlighting --- src/Client/ClientBaseHelpers.cpp | 4 +++- src/Parsers/IParserBase.cpp | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index 22f6c8912b12..ce4ee9a0559a 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -146,7 +146,9 @@ void highlight(const String & query, std::vector & colors } } - if (!parse_res) + Token last_token = token_iterator.max(); + + if (!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) { pos += UTF8::countCodePoints(reinterpret_cast(prev), expected.max_parsed_pos - prev); diff --git a/src/Parsers/IParserBase.cpp b/src/Parsers/IParserBase.cpp index 1293dd7dacb2..9d39056a8f16 100644 --- a/src/Parsers/IParserBase.cpp +++ b/src/Parsers/IParserBase.cpp @@ -15,12 +15,16 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) if (res) { Highlight type = highlight(); - if (type != Highlight::none) + if (pos->begin > begin && type != Highlight::none) { + Pos prev_token = pos; + --prev_token; + HighlightedRange range; range.begin = begin; - range.end = pos->begin; + range.end = prev_token->end; range.highlight = type; + expected.highlight(range); } } From c8d6dc47c3f66a1bb6bae2c6d0f2f13d64173fff Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 4 Apr 2024 23:18:18 +0200 Subject: [PATCH 331/470] Fix test --- src/Client/ClientBaseHelpers.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index ce4ee9a0559a..018185894993 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -125,6 +125,8 @@ void highlight(const String & query, std::vector & colors } catch (...) { + /// Skip highlighting in the case of exceptions during parsing. + /// It is ok to ignore unknown exceptions here. return; } @@ -157,6 +159,11 @@ void highlight(const String & query, std::vector & colors colors[pos] = Replxx::Color::BRIGHTRED; } + + if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter) + ReplxxLineReader::setLastIsDelimiter(true); + else if (last_token.type != TokenType::Whitespace) + ReplxxLineReader::setLastIsDelimiter(false); } #endif From fb664fa99f400d9a9e5ef861fb6a91b62a5d35a5 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 6 Apr 2024 01:47:47 +0200 Subject: [PATCH 332/470] Slightly better --- src/Client/ClientBaseHelpers.cpp | 13 +++++++-- src/Parsers/parseQuery.cpp | 49 ++++++++++++++++++-------------- src/Parsers/parseQuery.h | 5 ++++ 3 files changed, 43 insertions(+), 24 deletions(-) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index 018185894993..c08e015e1c53 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -149,8 +150,11 @@ void highlight(const String & query, std::vector & colors } Token last_token = token_iterator.max(); + /// Raw data in INSERT queries, which is not necessarily tokenized. + const char * insert_data = ast ? getInsertData(ast) : nullptr; - if (!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) + if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) + && !(insert_data && expected.max_parsed_pos >= insert_data)) { pos += UTF8::countCodePoints(reinterpret_cast(prev), expected.max_parsed_pos - prev); @@ -160,10 +164,15 @@ void highlight(const String & query, std::vector & colors colors[pos] = Replxx::Color::BRIGHTRED; } - if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter) + if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter + || query.ends_with(';') || query.ends_with("\\G")) /// This is for raw data in INSERT queries, which is not necessarily tokenized. + { ReplxxLineReader::setLastIsDelimiter(true); + } else if (last_token.type != TokenType::Whitespace) + { ReplxxLineReader::setLastIsDelimiter(false); + } } #endif diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 51878efa7067..c6727a369958 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -226,6 +226,29 @@ std::string getUnmatchedParenthesesErrorMessage( } +const char * getInsertData(const ASTPtr & ast) +{ + /// Either it is INSERT or EXPLAIN INSERT. + + ASTInsertQuery * insert = nullptr; + if (auto * explain = ast->as()) + { + if (auto explained_query = explain->getExplainedQuery()) + { + insert = explained_query->as(); + } + } + else + { + insert = ast->as(); + } + + if (insert) + return insert->data; + return nullptr; +} + + ASTPtr tryParseQuery( IParser & parser, const char * & _out_query_end, /* also query begin as input parameter */ @@ -270,29 +293,11 @@ ASTPtr tryParseQuery( if (res && max_parser_depth) res->checkDepth(max_parser_depth); - ASTInsertQuery * insert = nullptr; - if (parse_res) - { - if (auto * explain = res->as()) - { - if (auto explained_query = explain->getExplainedQuery()) - { - insert = explained_query->as(); - } - } - else - { - insert = res->as(); - } - } - - // If parsed query ends at data for insertion. Data for insertion could be - // in any format and not necessary be lexical correct, so we can't perform - // most of the checks. - if (insert && insert->data) - { + /// If parsed query ends at data for insertion. Data for insertion could be + /// in any format and not necessary be lexical correct, so we can't perform + /// most of the checks. + if (res && getInsertData(res)) return res; - } // More granular checks for queries other than INSERT w/inline data. /// Lexical error diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index 93c1a4652671..564415d0b85c 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -71,4 +71,9 @@ std::pair splitMultipartQuery( size_t max_parser_backtracks, bool allow_settings_after_format_in_insert); +/** If the query contains raw data part, such as INSERT ... FORMAT ..., return a pointer to it. + * The SQL parser stops at the raw data part, which is parsed by a separate parser. + */ +const char * getInsertData(const ASTPtr & ast); + } From 0ff26d2d777b2bd861fc93d8a337993eb292a260 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sun, 7 Apr 2024 23:23:36 +0200 Subject: [PATCH 333/470] A little better --- src/Client/ClientBaseHelpers.cpp | 6 +++--- src/Parsers/ExpressionElementParsers.cpp | 2 +- src/Parsers/ExpressionElementParsers.h | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index c08e015e1c53..4dcd025d9fc1 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -105,9 +105,9 @@ void highlight(const String & query, std::vector & colors {Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)}, {Highlight::identifier, Replxx::Color::CYAN}, {Highlight::function, Replxx::Color::BROWN}, - {Highlight::alias, Replxx::Color::MAGENTA}, + {Highlight::alias, replxx::color::rgb666(0, 4, 4)}, {Highlight::substitution, Replxx::Color::MAGENTA}, - {Highlight::number, Replxx::Color::BRIGHTGREEN}, + {Highlight::number, replxx::color::rgb666(0, 4, 0)}, {Highlight::string, Replxx::Color::GREEN}, }; @@ -161,7 +161,7 @@ void highlight(const String & query, std::vector & colors if (pos >= colors.size()) pos = colors.size() - 1; - colors[pos] = Replxx::Color::BRIGHTRED; + colors[pos] = replxx::color::bg(replxx::color::rgb666(5, 3, 3)); } if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index d5a67c09f9fc..dce0bc62d5b5 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -1491,7 +1491,7 @@ const char * ParserAlias::restricted_keywords[] = bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_as(Keyword::AS); - ParserIdentifier id_p; + ParserIdentifier id_p(false, Highlight::alias); bool has_as_word = s_as.ignore(pos, expected); if (!allow_alias_without_as_keyword && !has_as_word) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 61e35690938b..6dbb75450edd 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -369,7 +369,6 @@ class ParserAlias : public IParserBase const char * getName() const override { return "alias"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - Highlight highlight() const override { return Highlight::alias; } }; From 537f045c1ce258757eda7fce5c639461e9449114 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 04:19:34 +0200 Subject: [PATCH 334/470] Miscellaneous --- src/Client/ClientBase.cpp | 8 ++++---- src/Client/ClientBaseHelpers.cpp | 2 +- src/Parsers/parseDatabaseAndTableName.cpp | 15 --------------- src/Parsers/parseQuery.cpp | 21 +++++++++++---------- 4 files changed, 16 insertions(+), 30 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index 7a3192d1d9cf..248448f36c6b 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2056,7 +2056,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( return MultiQueryProcessingStage::QUERIES_END; // Remove leading empty newlines and other whitespace, because they - // are annoying to filter in query log. This is mostly relevant for + // are annoying to filter in the query log. This is mostly relevant for // the tests. while (this_query_begin < all_queries_end && isWhitespaceASCII(*this_query_begin)) ++this_query_begin; @@ -2086,7 +2086,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( { parsed_query = parseQuery(this_query_end, all_queries_end, true); } - catch (Exception & e) + catch (const Exception & e) { current_exception.reset(e.clone()); return MultiQueryProcessingStage::PARSING_EXCEPTION; @@ -2111,9 +2111,9 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // INSERT queries may have the inserted data in the query text // that follow the query itself, e.g. "insert into t format CSV 1;2". // They need special handling. First of all, here we find where the - // inserted data ends. In multy-query mode, it is delimited by a + // inserted data ends. In multi-query mode, it is delimited by a // newline. - // The VALUES format needs even more handling -- we also allow the + // The VALUES format needs even more handling - we also allow the // data to be delimited by semicolon. This case is handled later by // the format parser itself. // We can't do multiline INSERTs with inline data, because most diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index 4dcd025d9fc1..da164bd12263 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -161,7 +161,7 @@ void highlight(const String & query, std::vector & colors if (pos >= colors.size()) pos = colors.size() - 1; - colors[pos] = replxx::color::bg(replxx::color::rgb666(5, 3, 3)); + colors[pos] = Replxx::Color::BRIGHTRED; } if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter diff --git a/src/Parsers/parseDatabaseAndTableName.cpp b/src/Parsers/parseDatabaseAndTableName.cpp index 81660bc46008..eaf020e445bf 100644 --- a/src/Parsers/parseDatabaseAndTableName.cpp +++ b/src/Parsers/parseDatabaseAndTableName.cpp @@ -60,21 +60,6 @@ bool parseDatabaseAndTableAsAST(IParser::Pos & pos, Expected & expected, ASTPtr } -bool parseDatabase(IParser::Pos & pos, Expected & expected, String & database_str) -{ - ParserToken s_dot(TokenType::Dot); - ParserIdentifier identifier_parser; - - ASTPtr database; - database_str = ""; - - if (!identifier_parser.parse(pos, database, expected)) - return false; - - tryGetIdentifierNameInto(database, database_str); - return true; -} - bool parseDatabaseAsAST(IParser::Pos & pos, Expected & expected, ASTPtr & database) { ParserIdentifier identifier_parser(/* allow_query_parameter */true); diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index c6727a369958..2a6abc234065 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -226,24 +226,27 @@ std::string getUnmatchedParenthesesErrorMessage( } -const char * getInsertData(const ASTPtr & ast) +static ASTInsertQuery * getInsertAST(const ASTPtr & ast) { /// Either it is INSERT or EXPLAIN INSERT. - - ASTInsertQuery * insert = nullptr; if (auto * explain = ast->as()) { if (auto explained_query = explain->getExplainedQuery()) { - insert = explained_query->as(); + return explained_query->as(); } } else { - insert = ast->as(); + return ast->as(); } - if (insert) + return nullptr; +} + +const char * getInsertData(const ASTPtr & ast) +{ + if (const ASTInsertQuery * insert = getInsertAST(ast)) return insert->data; return nullptr; } @@ -439,11 +442,9 @@ std::pair splitMultipartQuery( ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth, max_parser_backtracks); - auto * insert = ast->as(); - - if (insert && insert->data) + if (ASTInsertQuery * insert = getInsertAST(ast)) { - /// Data for INSERT is broken on new line + /// Data for INSERT is broken on the new line pos = insert->data; while (*pos && *pos != '\n') ++pos; From 426104a2d61dea08ccc7c64052cb5c1a0de66d9d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 04:30:22 +0200 Subject: [PATCH 335/470] Fix tests --- ..._autocomplete_word_break_characters.expect | 2 +- .../01676_clickhouse_client_autocomplete.sh | 2 +- .../01702_system_query_log.reference | 20 +++++++++---------- ...160_client_autocomplete_parse_query.expect | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index 44f3ba9681a9..ffd3e742cec8 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -20,7 +20,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index ebd6490077e4..f04ffdae229f 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -43,7 +43,7 @@ expect_after { -i \$any_spawn_id timeout { exit 1 } } -spawn bash -c "$*" +spawn bash -c "$* --highlight 0" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference index c653021aa5ae..5498b5377ba5 100644 --- a/tests/queries/0_stateless/01702_system_query_log.reference +++ b/tests/queries/0_stateless/01702_system_query_log.reference @@ -43,16 +43,16 @@ Alter ALTER TABLE sqllt.table UPDATE i = i + 1 WHERE 1; Alter ALTER TABLE sqllt.table DELETE WHERE i > 65535; Select -- not done, seems to hard, so I\'ve skipped queries of ALTER-X, where X is:\n-- PARTITION\n-- ORDER BY\n-- SAMPLE BY\n-- INDEX\n-- CONSTRAINT\n-- TTL\n-- USER\n-- QUOTA\n-- ROLE\n-- ROW POLICY\n-- SETTINGS PROFILE\n\nSELECT \'SYSTEM queries\'; System SYSTEM FLUSH LOGS; -System SYSTEM STOP MERGES sqllt.table -System SYSTEM START MERGES sqllt.table -System SYSTEM STOP TTL MERGES sqllt.table -System SYSTEM START TTL MERGES sqllt.table -System SYSTEM STOP MOVES sqllt.table -System SYSTEM START MOVES sqllt.table -System SYSTEM STOP FETCHES sqllt.table -System SYSTEM START FETCHES sqllt.table -System SYSTEM STOP REPLICATED SENDS sqllt.table -System SYSTEM START REPLICATED SENDS sqllt.table +System SYSTEM STOP MERGES sqllt.table; +System SYSTEM START MERGES sqllt.table; +System SYSTEM STOP TTL MERGES sqllt.table; +System SYSTEM START TTL MERGES sqllt.table; +System SYSTEM STOP MOVES sqllt.table; +System SYSTEM START MOVES sqllt.table; +System SYSTEM STOP FETCHES sqllt.table; +System SYSTEM START FETCHES sqllt.table; +System SYSTEM STOP REPLICATED SENDS sqllt.table; +System SYSTEM START REPLICATED SENDS sqllt.table; Select -- SYSTEM RELOAD DICTIONARY sqllt.dictionary; -- temporary out of order: Code: 210, Connection refused (localhost:9001) (version 21.3.1.1)\n-- DROP REPLICA\n-- haha, no\n-- SYSTEM KILL;\n-- SYSTEM SHUTDOWN;\n\n-- Since we don\'t really care about the actual output, suppress it with `FORMAT Null`.\nSELECT \'SHOW queries\'; Show SHOW CREATE TABLE sqllt.table FORMAT Null; Show SHOW CREATE DICTIONARY sqllt.dictionary FORMAT Null; diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect index 2d404b005c71..30d725e6a2a2 100755 --- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect +++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect @@ -21,7 +21,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" expect ":) " # Make a query From bd3f32dc0680c8581b68e5200ff9244a2cdde3a0 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 05:34:41 +0200 Subject: [PATCH 336/470] Fix a test --- src/Parsers/ParserInsertQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index d1171dd48155..3e691b13ef2c 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -41,7 +41,6 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_with(Keyword::WITH); ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); - ParserToken s_semicolon(TokenType::Semicolon); ParserIdentifier name_p(true); ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ParserFunction table_function_p{false}; @@ -149,8 +148,9 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// If VALUES is defined in query, everything except setting will be parsed as data, /// and if values followed by semicolon, the data should be null. - if (!s_semicolon.checkWithoutMoving(pos, expected)) + if (pos->type != TokenType::Semicolon) data = pos->begin; + format_str = "Values"; } else if (s_format.ignore(pos, expected)) From 28272873faa90c0b87d5db3f53df91b40ef71077 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 05:49:02 +0200 Subject: [PATCH 337/470] Fix test --- ...01565_query_loop_after_client_error.expect | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index ac69c18ce392..6253840c63cf 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -24,30 +24,21 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0" expect "\n:) " -send -- "DROP TABLE IF EXISTS t01565;\n" -# NOTE: this is important for -mn mode, you should send "\r" only after reading echoed command -expect "\r\n" -send -- "\r" +send -- "DROP TABLE IF EXISTS t01565;\r" expect "\nOk." expect "\n:)" -send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\n" -expect "\r\n" -send -- "\r" +send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\r" expect "\nOk." expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\n" -expect "\r\n" -send -- "\r" +send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\r" expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\n" -expect "\r\n" -send -- "\r" +send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\r" expect "\nOk." expect "\n:) " From 064acacd93a7de86cd66bf551905b9ff365a9eef Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 12:43:10 +0200 Subject: [PATCH 338/470] Update test --- .../0_stateless/02263_format_insert_settings.reference | 10 ---------- .../0_stateless/02263_format_insert_settings.sh | 2 -- 2 files changed, 12 deletions(-) diff --git a/tests/queries/0_stateless/02263_format_insert_settings.reference b/tests/queries/0_stateless/02263_format_insert_settings.reference index 2bba75f6788b..ea8b78faf8c2 100644 --- a/tests/queries/0_stateless/02263_format_insert_settings.reference +++ b/tests/queries/0_stateless/02263_format_insert_settings.reference @@ -21,10 +21,6 @@ INSERT INTO foo FORMAT Values INSERT INTO foo SELECT 1 [oneline] insert into foo select 1 INSERT INTO foo SELECT 1 -[multi] insert into foo watch bar -INSERT INTO foo WATCH bar -[oneline] insert into foo watch bar -INSERT INTO foo WATCH bar [multi] insert into foo format tsv INSERT INTO foo FORMAT tsv [oneline] insert into foo format tsv @@ -41,12 +37,6 @@ SETTINGS max_threads = 1 SELECT 1 [oneline] insert into foo settings max_threads=1 select 1 INSERT INTO foo SETTINGS max_threads = 1 SELECT 1 -[multi] insert into foo settings max_threads=1 watch bar -INSERT INTO foo -SETTINGS max_threads = 1 -WATCH bar -[oneline] insert into foo settings max_threads=1 watch bar -INSERT INTO foo SETTINGS max_threads = 1 WATCH bar [multi] insert into foo settings max_threads=1 format tsv INSERT INTO foo SETTINGS max_threads = 1 diff --git a/tests/queries/0_stateless/02263_format_insert_settings.sh b/tests/queries/0_stateless/02263_format_insert_settings.sh index 49aa56d6c0a2..808ab23ee59b 100755 --- a/tests/queries/0_stateless/02263_format_insert_settings.sh +++ b/tests/queries/0_stateless/02263_format_insert_settings.sh @@ -40,12 +40,10 @@ $CLICKHOUSE_CLIENT -q 'drop table data_02263' run_format_both 'insert into foo values' run_format_both 'insert into foo select 1' -run_format_both 'insert into foo watch bar' run_format_both 'insert into foo format tsv' run_format_both 'insert into foo settings max_threads=1 values' run_format_both 'insert into foo settings max_threads=1 select 1' -run_format_both 'insert into foo settings max_threads=1 watch bar' run_format_both 'insert into foo settings max_threads=1 format tsv' run_format_both 'insert into foo select 1 settings max_threads=1' run_format_both 'insert into foo settings max_threads=1 select 1 settings max_threads=1' From 9ebf091ae41c8fc9deda4a2fd6260dd8c47d102a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 13:03:55 +0200 Subject: [PATCH 339/470] Documentation --- src/Client/ClientBaseHelpers.cpp | 20 ++++++++++++++++++++ src/Parsers/IParser.h | 2 ++ 2 files changed, 22 insertions(+) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index da164bd12263..3f3e3b1492fa 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -97,9 +97,16 @@ void highlight(const String & query, std::vector & colors { using namespace replxx; + /// The `colors` array maps to a Unicode code point position in a string into a color. + /// A color is set for every position individually (not for a range). + + /// Empty input. if (colors.empty()) return; + /// The colors should be legible (and look gorgeous) in both dark and light themes. + /// When modifying this, check it in both themes. + static const std::unordered_map type_to_color = { {Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)}, @@ -111,11 +118,20 @@ void highlight(const String & query, std::vector & colors {Highlight::string, Replxx::Color::GREEN}, }; + /// We set reasonably small limits for size/depth, because we don't want the CLI to be slow. + /// While syntax highlighting is unneeded for long queries, which the user couldn't read anyway. + const char * begin = query.data(); const char * end = begin + query.size(); Tokens tokens(begin, end, 1000, true); IParser::Pos token_iterator(tokens, static_cast(1000), static_cast(10000)); Expected expected; + + /// We don't do highlighting for foreign dialects, such as PRQL and Kusto. + /// Only normal ClickHouse SQL queries are highlighted. + + /// Currently we highlight only the first query in the multi-query mode. + ParserQuery parser(end); ASTPtr ast; bool parse_res = false; @@ -138,6 +154,7 @@ void highlight(const String & query, std::vector & colors auto it = type_to_color.find(range.highlight); if (it != type_to_color.end()) { + /// We have to map from byte positions to Unicode positions. pos += UTF8::countCodePoints(reinterpret_cast(prev), range.begin - prev); size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(range.begin), range.end - range.begin); @@ -153,6 +170,8 @@ void highlight(const String & query, std::vector & colors /// Raw data in INSERT queries, which is not necessarily tokenized. const char * insert_data = ast ? getInsertData(ast) : nullptr; + /// Highlight the last error in red. If the parser failed or the lexer found an invalid token, + /// or if it didn't parse all the data (except, the data for INSERT query, which is legitimately unparsed) if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) && !(insert_data && expected.max_parsed_pos >= insert_data)) { @@ -164,6 +183,7 @@ void highlight(const String & query, std::vector & colors colors[pos] = Replxx::Color::BRIGHTRED; } + /// This is a callback for the client/local app to better find query end. Note: this is a kludge, remove it. if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter || query.ends_with(';') || query.ends_with("\\G")) /// This is for raw data in INSERT queries, which is not necessarily tokenized. { diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index d79bc0fb9998..f8146c0a4f6d 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -48,6 +48,8 @@ struct HighlightedRange /** Collects variants, how parser could proceed further at rightmost position. + * Also collects a mapping of parsed ranges for highlighting, + * which is accumulated through the parsing. */ struct Expected { From 444016fb3ee585cae98df9f16285b9c0fff6577a Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 11:20:30 +0000 Subject: [PATCH 340/470] CI: fix unittest issue --- tests/ci/ci.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 36e9b1838052..f60c40f58605 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -773,6 +773,7 @@ def create_from_pr_message( not pr_info.is_pr() and not debug_message ): # if commit_message is provided it's test/debug scenario - do not return # CI options can be configured in PRs only + # if debug_message is provided - it's a test return res message = debug_message or GitRunner(set_cwd_to_git_root=True).run( f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" @@ -790,9 +791,9 @@ def create_from_pr_message( print(f"CI tags from PR body: [{matches_pr}]") matches = list(set(matches + matches_pr)) - if "do not test" in pr_info.labels: - # do_not_test could be set in GH labels - res.do_not_test = True + if "do not test" in pr_info.labels: + # do_not_test could be set in GH labels + res.do_not_test = True for match in matches: if match.startswith("job_"): From 094f94882c972a798ace493ca9a7019255a64f7b Mon Sep 17 00:00:00 2001 From: Ilya Andreev <18560147+andreev-io@users.noreply.github.com> Date: Mon, 8 Apr 2024 11:35:03 +0100 Subject: [PATCH 341/470] Fix a typo in the documentation of the ALTER TABLE ... MODIFY QUERY statement --- docs/en/sql-reference/statements/alter/view.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/statements/alter/view.md b/docs/en/sql-reference/statements/alter/view.md index 59045afdeb60..e063b27424e7 100644 --- a/docs/en/sql-reference/statements/alter/view.md +++ b/docs/en/sql-reference/statements/alter/view.md @@ -8,7 +8,7 @@ sidebar_label: VIEW You can modify `SELECT` query that was specified when a [materialized view](../create/view.md#materialized) was created with the `ALTER TABLE … MODIFY QUERY` statement without interrupting ingestion process. -This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underling storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. +This command is created to change materialized view created with `TO [db.]name` clause. It does not change the structure of the underlying storage table and it does not change the columns' definition of the materialized view, because of this the application of this command is very limited for materialized views are created without `TO [db.]name` clause. **Example with TO table** From 802e6e8f2fcf64d15f424f6db0c815cce56f1e37 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 2 Apr 2024 22:37:52 +0200 Subject: [PATCH 342/470] Fix parsing booleans as values of settings. --- src/Parsers/ParserSetQuery.cpp | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/Parsers/ParserSetQuery.cpp b/src/Parsers/ParserSetQuery.cpp index 13b881635cd7..f08d2b978c62 100644 --- a/src/Parsers/ParserSetQuery.cpp +++ b/src/Parsers/ParserSetQuery.cpp @@ -210,12 +210,8 @@ bool ParserSetQuery::parseNameValuePair(SettingChange & change, IParser::Pos & p if (!s_eq.ignore(pos, expected)) return false; - if (ParserKeyword(Keyword::TRUE_KEYWORD).ignore(pos, expected)) - value = std::make_shared(Field(static_cast(1))); - else if (ParserKeyword(Keyword::FALSE_KEYWORD).ignore(pos, expected)) - value = std::make_shared(Field(static_cast(0))); /// for SETTINGS disk=disk(type='s3', path='', ...) - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { tryGetIdentifierNameInto(name, change.name); change.value = createFieldFromAST(function_ast); @@ -276,11 +272,7 @@ bool ParserSetQuery::parseNameValuePairWithParameterOrDefault( } /// Setting - if (ParserKeyword(Keyword::TRUE_KEYWORD).ignore(pos, expected)) - node = std::make_shared(Field(static_cast(1))); - else if (ParserKeyword(Keyword::FALSE_KEYWORD).ignore(pos, expected)) - node = std::make_shared(Field(static_cast(0))); - else if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") + if (function_p.parse(pos, function_ast, expected) && function_ast->as()->name == "disk") { change.name = name; change.value = createFieldFromAST(function_ast); From 83d1f1a8769d3be8d78f48db82873b9438ac87f4 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 11:51:59 +0000 Subject: [PATCH 343/470] CI: fix for docs only pr --- tests/ci/ci.py | 7 ++++--- tests/ci/pr_info.py | 1 + 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 36e9b1838052..f60c40f58605 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -773,6 +773,7 @@ def create_from_pr_message( not pr_info.is_pr() and not debug_message ): # if commit_message is provided it's test/debug scenario - do not return # CI options can be configured in PRs only + # if debug_message is provided - it's a test return res message = debug_message or GitRunner(set_cwd_to_git_root=True).run( f"{GIT_PREFIX} log {pr_info.sha} --format=%B -n 1" @@ -790,9 +791,9 @@ def create_from_pr_message( print(f"CI tags from PR body: [{matches_pr}]") matches = list(set(matches + matches_pr)) - if "do not test" in pr_info.labels: - # do_not_test could be set in GH labels - res.do_not_test = True + if "do not test" in pr_info.labels: + # do_not_test could be set in GH labels + res.do_not_test = True for match in matches: if match.startswith("job_"): diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index ddf59c49e1f2..204284785c9a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -26,6 +26,7 @@ DIFF_IN_DOCUMENTATION_EXT = [ ".html", ".md", + ".mdx", ".yml", ".txt", ".css", From 259d50c57b6227b2a078effcef8de19cd23c346f Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 8 Apr 2024 14:36:07 +0200 Subject: [PATCH 344/470] Add more comments --- src/Interpreters/Cache/EvictionCandidates.cpp | 22 ++++-- src/Interpreters/Cache/EvictionCandidates.h | 8 +- src/Interpreters/Cache/FileCache.cpp | 75 ++++++++++++------- src/Interpreters/Cache/FileCacheFactory.cpp | 8 +- .../Cache/LRUFileCachePriority.cpp | 2 +- src/Interpreters/Cache/Metadata.cpp | 66 ++++++++-------- 6 files changed, 108 insertions(+), 73 deletions(-) diff --git a/src/Interpreters/Cache/EvictionCandidates.cpp b/src/Interpreters/Cache/EvictionCandidates.cpp index f9f9bdfe662d..d20ae77d7206 100644 --- a/src/Interpreters/Cache/EvictionCandidates.cpp +++ b/src/Interpreters/Cache/EvictionCandidates.cpp @@ -17,6 +17,11 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +EvictionCandidates::EvictionCandidates() + : log(getLogger("EvictionCandidates")) +{ +} + EvictionCandidates::~EvictionCandidates() { /// Here `queue_entries_to_invalidate` contains queue entries @@ -64,8 +69,11 @@ void EvictionCandidates::add( void EvictionCandidates::removeQueueEntries(const CachePriorityGuard::Lock & lock) { - auto log = getLogger("EvictionCandidates"); + /// Remove queue entries of eviction candidates. + /// This will release space we consider to be hold for them. + LOG_TEST(log, "Will remove {} eviction candidates", size()); + for (const auto & [key, key_candidates] : candidates) { for (const auto & candidate : key_candidates.candidates) @@ -87,6 +95,7 @@ void EvictionCandidates::evict() auto timer = DB::CurrentThread::getProfileEvents().timer(ProfileEvents::FilesystemCacheEvictMicroseconds); + /// If queue entries are already removed, then nothing to invalidate. if (!removed_queue_entries) queue_entries_to_invalidate.reserve(candidates_size); @@ -184,6 +193,12 @@ void EvictionCandidates::finalize( on_finalize.clear(); } +bool EvictionCandidates::needFinalize() const +{ + /// Do we need to call finalize()? + return !on_finalize.empty() || !queue_entries_to_invalidate.empty(); +} + void EvictionCandidates::setSpaceHolder( size_t size, size_t elements, @@ -196,9 +211,4 @@ void EvictionCandidates::setSpaceHolder( hold_space = std::make_unique(size, elements, priority, lock); } -void EvictionCandidates::insert(EvictionCandidates && other, const CachePriorityGuard::Lock &) -{ - candidates.insert(make_move_iterator(other.candidates.begin()), make_move_iterator(other.candidates.end())); -} - } diff --git a/src/Interpreters/Cache/EvictionCandidates.h b/src/Interpreters/Cache/EvictionCandidates.h index baacbc0cfae5..0dcc6bc0dda5 100644 --- a/src/Interpreters/Cache/EvictionCandidates.h +++ b/src/Interpreters/Cache/EvictionCandidates.h @@ -9,7 +9,7 @@ class EvictionCandidates : private boost::noncopyable public: using FinalizeEvictionFunc = std::function; - EvictionCandidates() = default; + EvictionCandidates(); ~EvictionCandidates(); void add( @@ -17,8 +17,6 @@ class EvictionCandidates : private boost::noncopyable LockedKey & locked_key, const CachePriorityGuard::Lock &); - void insert(EvictionCandidates && other, const CachePriorityGuard::Lock &); - void evict(); void removeQueueEntries(const CachePriorityGuard::Lock &); @@ -29,6 +27,8 @@ class EvictionCandidates : private boost::noncopyable FileCacheQueryLimit::QueryContext * query_context, const CachePriorityGuard::Lock &); + bool needFinalize() const; + size_t size() const { return candidates_size; } auto begin() const { return candidates.begin(); } @@ -57,6 +57,8 @@ class EvictionCandidates : private boost::noncopyable bool removed_queue_entries = false; IFileCachePriority::HoldSpacePtr hold_space; + + LoggerPtr log; }; using EvictionCandidatesPtr = std::unique_ptr; diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 12ea2c178bc7..29f2ebeca554 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1389,7 +1389,18 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, || new_settings.max_elements != actual_settings.max_elements) { EvictionCandidates eviction_candidates; - bool limits_satisfied = false; + bool modified_size_limit = false; + + /// In order to not block cache for the duration of cache resize, + /// we do: + /// a. Take a cache lock. + /// 1. Collect eviction candidates, + /// 2. Remove queue entries of eviction candidates. + /// This will release space we consider to be hold for them, + /// so that we can safely modify size limits. + /// 3. Modify size limits of cache. + /// b. Release a cache lock. + /// 1. Do actual eviction from filesystem. { cache_is_being_resized.store(true, std::memory_order_relaxed); SCOPE_EXIT({ @@ -1399,38 +1410,45 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, auto cache_lock = lockCache(); FileCacheReserveStat stat; - limits_satisfied = main_priority->collectCandidatesForEviction( - new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, stat, eviction_candidates, cache_lock); + if (main_priority->collectCandidatesForEviction( + new_settings.max_size, new_settings.max_elements, 0/* max_candidates_to_evict */, + stat, eviction_candidates, cache_lock)) + { + /// Remove only queue entries of eviction candidates. + eviction_candidates.removeQueueEntries(cache_lock); + /// Note that (in-memory) metadata about corresponding file segments + /// (e.g. file segment info in CacheMetadata) will be removed + /// only after eviction from filesystem. This is needed to avoid + /// a race on removal of file from filesystsem and + /// addition of the same file as part of a newly cached file segment. + + /// Modify cache size limits. + /// From this point cache eviction will follow them. + main_priority->modifySizeLimits( + new_settings.max_size, new_settings.max_elements, + new_settings.slru_size_ratio, cache_lock); - eviction_candidates.removeQueueEntries(cache_lock); + modified_size_limit = true; + } + } - if (limits_satisfied) + if (modified_size_limit) + { + try { - main_priority->modifySizeLimits( - new_settings.max_size, new_settings.max_elements, new_settings.slru_size_ratio, cache_lock); + /// Do actual eviction from filesystem. + eviction_candidates.evict(); } - else + catch (...) { - LOG_WARNING(log, "Unable to modify size limit from {} to {}, " - "elements limit from {} to {}", - actual_settings.max_size, new_settings.max_size, - actual_settings.max_elements, new_settings.max_elements); + if (eviction_candidates.needFinalize()) + eviction_candidates.finalize(nullptr, lockCache()); + throw; } - } - try - { - eviction_candidates.evict(); - } - catch (...) - { - auto cache_lock = lockCache(); - eviction_candidates.finalize(nullptr, cache_lock); - throw; - } + if (eviction_candidates.needFinalize()) + eviction_candidates.finalize(nullptr, lockCache()); - if (limits_satisfied) - { LOG_INFO(log, "Changed max_size from {} to {}, max_elements from {} to {}", actual_settings.max_size, new_settings.max_size, actual_settings.max_elements, new_settings.max_elements); @@ -1438,6 +1456,13 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, actual_settings.max_size = new_settings.max_size; actual_settings.max_elements = new_settings.max_elements; } + else + { + LOG_WARNING(log, "Unable to modify size limit from {} to {}, " + "elements limit from {} to {}", + actual_settings.max_size, new_settings.max_size, + actual_settings.max_elements, new_settings.max_elements); + } } if (new_settings.max_file_segment_size != actual_settings.max_file_segment_size) diff --git a/src/Interpreters/Cache/FileCacheFactory.cpp b/src/Interpreters/Cache/FileCacheFactory.cpp index 747b31bff644..a7a5834f03d7 100644 --- a/src/Interpreters/Cache/FileCacheFactory.cpp +++ b/src/Interpreters/Cache/FileCacheFactory.cpp @@ -142,10 +142,8 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig caches_by_name_copy = caches_by_name; } - auto * log = &Poco::Logger::get("FileCacheFactory"); - std::unordered_set checked_paths; - for (const auto & [cache_name, cache_info] : caches_by_name_copy) + for (const auto & [_, cache_info] : caches_by_name_copy) { if (cache_info->config_path.empty() || checked_paths.contains(cache_info->config_path)) continue; @@ -158,10 +156,12 @@ void FileCacheFactory::updateSettingsFromConfig(const Poco::Util::AbstractConfig FileCacheSettings old_settings = cache_info->getSettings(); if (old_settings == new_settings) { - LOG_TRACE(log, "No settings changes for cache: {}", cache_name); continue; } + /// FIXME: registerDiskCache modifies `path` setting of FileCacheSettings if path is relative. + /// This can lead to calling applySettingsIfPossible even though nothing changed, which is avoidable. + // LOG_TRACE(log, "Will apply settings changes for cache {}. " // "Settings changes: {} (new settings: {}, old_settings: {})", // cache_name, fmt::join(new_settings.getSettingsDiff(old_settings), ", "), diff --git a/src/Interpreters/Cache/LRUFileCachePriority.cpp b/src/Interpreters/Cache/LRUFileCachePriority.cpp index e859529f5e77..1a2040f9ed2b 100644 --- a/src/Interpreters/Cache/LRUFileCachePriority.cpp +++ b/src/Interpreters/Cache/LRUFileCachePriority.cpp @@ -280,7 +280,7 @@ bool LRUFileCachePriority::collectCandidatesForEviction( auto can_fit = [&] { - return canFit(size, 1, stat.total_stat.releasable_size, stat.total_stat.releasable_count, lock); + return canFit(size, elements, stat.total_stat.releasable_size, stat.total_stat.releasable_count, lock); }; iterateForEviction(res, stat, can_fit, lock); diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 631c1aa2ae67..2cbd56ba0bc9 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -941,50 +941,48 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( file_segment->detach(segment_lock, *this); + try { - try + const auto path = key_metadata->getFileSegmentPath(*file_segment); + if (file_segment->segment_kind == FileSegmentKind::Temporary) { - const auto path = key_metadata->getFileSegmentPath(*file_segment); - if (file_segment->segment_kind == FileSegmentKind::Temporary) - { - /// FIXME: For temporary file segment the requirement is not as strong because - /// the implementation of "temporary data in cache" creates files in advance. - if (fs::exists(path)) - fs::remove(path); - } - else if (file_segment->downloaded_size == 0) - { - chassert(!fs::exists(path)); - } - else if (fs::exists(path)) - { + /// FIXME: For temporary file segment the requirement is not as strong because + /// the implementation of "temporary data in cache" creates files in advance. + if (fs::exists(path)) fs::remove(path); + } + else if (file_segment->downloaded_size == 0) + { + chassert(!fs::exists(path)); + } + else if (fs::exists(path)) + { + fs::remove(path); - /// Clear OpenedFileCache to avoid reading from incorrect file descriptor. - int flags = file_segment->getFlagsForLocalRead(); - /// Files are created with flags from file_segment->getFlagsForLocalRead() - /// plus optionally O_DIRECT is added, depends on query setting, so remove both. - OpenedFileCache::instance().remove(path, flags); - OpenedFileCache::instance().remove(path, flags | O_DIRECT); + /// Clear OpenedFileCache to avoid reading from incorrect file descriptor. + int flags = file_segment->getFlagsForLocalRead(); + /// Files are created with flags from file_segment->getFlagsForLocalRead() + /// plus optionally O_DIRECT is added, depends on query setting, so remove both. + OpenedFileCache::instance().remove(path, flags); + OpenedFileCache::instance().remove(path, flags | O_DIRECT); - LOG_TEST(key_metadata->logger(), "Removed file segment at path: {}", path); - } - else if (!can_be_broken) - { + LOG_TEST(key_metadata->logger(), "Removed file segment at path: {}", path); + } + else if (!can_be_broken) + { #ifdef ABORT_ON_LOGICAL_ERROR - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected path {} to exist", path); #else - LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", - path, getKey(), file_segment->offset()); + LOG_WARNING(key_metadata->logger(), "Expected path {} to exist, while removing {}:{}", + path, getKey(), file_segment->offset()); #endif - } - } - catch (...) - { - tryLogCurrentException(__PRETTY_FUNCTION__); - chassert(false); } } + catch (...) + { + tryLogCurrentException(__PRETTY_FUNCTION__); + chassert(false); + } return key_metadata->erase(it); } From b6eef6137823a92c6a6ba601bb4c879b2dee30fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Fri, 5 Apr 2024 17:10:01 +0200 Subject: [PATCH 345/470] Fix one phony case --- contrib/avro-cmake/CMakeLists.txt | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index 63b3854eef90..c99c7dd4624d 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -62,9 +62,12 @@ target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy) # create a symlink to include headers with set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") -ADD_CUSTOM_TARGET(avro_symlink_headers ALL - COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}" - COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro" +ADD_CUSTOM_COMMAND(OUTPUT "${AVRO_INCLUDE_DIR}" + COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}" + COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro" + DEPENDS "${AVROCPP_ROOT_DIR}/api" ) +ADD_CUSTOM_TARGET(avro_symlink_headers ALL + DEPENDS "${AVRO_INCLUDE_DIR}") add_dependencies(_avrocpp avro_symlink_headers) target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}") From 31cd71f8f2e78623af980d14ab0b29b3969757da Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 8 Apr 2024 14:39:11 +0200 Subject: [PATCH 346/470] Rename test --- ...ence => 03032_dynamically_resize_filesystem_cache_2.reference} | 0 ...hardcore.sh => 03032_dynamically_resize_filesystem_cache_2.sh} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename tests/queries/0_stateless/{03032_dynamically_resize_filesystem_cache_hardcore.reference => 03032_dynamically_resize_filesystem_cache_2.reference} (100%) rename tests/queries/0_stateless/{03032_dynamically_resize_filesystem_cache_hardcore.sh => 03032_dynamically_resize_filesystem_cache_2.sh} (100%) diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.reference b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.reference similarity index 100% rename from tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.reference rename to tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.reference diff --git a/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.sh b/tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh similarity index 100% rename from tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_hardcore.sh rename to tests/queries/0_stateless/03032_dynamically_resize_filesystem_cache_2.sh From e4b0ca5d836e14fada7592777a0443914bfbaa47 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Apr 2024 12:59:20 +0000 Subject: [PATCH 347/470] Fix filter pushdown from additional_table_filters in Merge engine in analyzer --- src/Planner/PlannerJoinTree.cpp | 3 ++- src/Storages/StorageDummy.h | 6 ++++++ ...03033_analyzer_merge_engine_filter_push_down.reference | 3 +++ .../03033_analyzer_merge_engine_filter_push_down.sql | 8 ++++++++ 4 files changed, 19 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference create mode 100644 tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index d2f37ff1ad4c..534080f17399 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -814,7 +814,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expres bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final); - if (storage->supportsPrewhere() && optimize_move_to_prewhere) + auto supported_prewhere_columns = storage->supportedPrewhereColumns(); + if (storage->canMoveConditionsToPrewhere() && optimize_move_to_prewhere && (!supported_prewhere_columns || supported_prewhere_columns->contains(filter_info.column_name))) { if (!prewhere_info) prewhere_info = std::make_shared(); diff --git a/src/Storages/StorageDummy.h b/src/Storages/StorageDummy.h index e9d8f90f755f..ae9bf2483e13 100644 --- a/src/Storages/StorageDummy.h +++ b/src/Storages/StorageDummy.h @@ -19,6 +19,12 @@ class StorageDummy final : public IStorage bool supportsSampling() const override { return true; } bool supportsFinal() const override { return true; } bool supportsPrewhere() const override { return true; } + + std::optional supportedPrewhereColumns() const override + { + return original_storage_snapshot ? original_storage_snapshot->storage.supportedPrewhereColumns() : std::nullopt; + } + bool supportsSubcolumns() const override { return true; } bool supportsDynamicSubcolumns() const override { return true; } bool canMoveConditionsToPrewhere() const override diff --git a/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference new file mode 100644 index 000000000000..86a000598545 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.reference @@ -0,0 +1,3 @@ +UInt32 1 +UInt32 2 +UInt32 3 diff --git a/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql new file mode 100644 index 000000000000..9be1152bbbf3 --- /dev/null +++ b/tests/queries/0_stateless/03033_analyzer_merge_engine_filter_push_down.sql @@ -0,0 +1,8 @@ +set allow_suspicious_low_cardinality_types=1; +drop table if exists test; +create table test (`x` LowCardinality(Nullable(UInt32)), `y` String) engine = MergeTree order by tuple(); +insert into test values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +create table m_table (x UInt32, y String) engine = Merge(currentDatabase(), 'test*'); +select toTypeName(x), x FROM m_table SETTINGS additional_table_filters = {'m_table':'x != 4'}, optimize_move_to_prewhere=1, allow_experimental_analyzer=1; +drop table test; + From 5e87ecf32e9138c1cf9e249f65580dd9a9c5732c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 8 Apr 2024 12:59:59 +0000 Subject: [PATCH 348/470] Revert "Updating test." This reverts commit 1a75d3ed46d0a2fe6a596afbc27ae20ff97427fb. --- .../02802_with_cube_with_totals.reference | 30 ------------------- .../02802_with_cube_with_totals.sql | 1 - 2 files changed, 31 deletions(-) diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.reference b/tests/queries/0_stateless/02802_with_cube_with_totals.reference index 206c32e562b0..c7b7b5704560 100644 --- a/tests/queries/0_stateless/02802_with_cube_with_totals.reference +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.reference @@ -1,35 +1,5 @@ ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 -((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 ((2147483648,(-0,1.1754943508222875e-38,2147483646,'-9223372036854775808',NULL))) 0 \N diff --git a/tests/queries/0_stateless/02802_with_cube_with_totals.sql b/tests/queries/0_stateless/02802_with_cube_with_totals.sql index 168e4d61b681..77adb68eb4b7 100644 --- a/tests/queries/0_stateless/02802_with_cube_with_totals.sql +++ b/tests/queries/0_stateless/02802_with_cube_with_totals.sql @@ -1,3 +1,2 @@ -set allow_experimental_analyzer=1; SELECT tuple((2147483648, (-0., 1.1754943508222875e-38, 2147483646, '-9223372036854775808', NULL))), toInt128(0.0001) GROUP BY ((256, toInt64(1.1754943508222875e-38), NULL), NULL, -0., ((65535, '-92233720368547758.07'), 0.9999), tuple(((1., 3.4028234663852886e38, '1', 0.5), NULL, tuple('0.1')))) WITH CUBE WITH TOTALS; SELECT NULL GROUP BY toUUID(NULL, '0', NULL, '0.0000065535'), 1 WITH CUBE WITH TOTALS; From 5e1c1b6b94d920fbfb361c0cf606728f730e149a Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 13:41:44 +0000 Subject: [PATCH 349/470] CI: test merge queue --- tests/ci/ci.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/ci.py b/tests/ci/ci.py index f60c40f58605..c2962c5b40e1 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -318,7 +318,7 @@ def fetch_records_data(self): self.update() if self.cache_data_fetched: - # there are no record w/o underling data - no need to fetch + # there are no records without fetched data - no need to fetch return self # clean up From 44d3612d77032e2b104296840690154e53d8f073 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 8 Apr 2024 15:52:38 +0200 Subject: [PATCH 350/470] Review suggestion --- src/Interpreters/Cache/FileCache.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 29f2ebeca554..be452e43bedf 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1458,10 +1458,13 @@ void FileCache::applySettingsIfPossible(const FileCacheSettings & new_settings, } else { - LOG_WARNING(log, "Unable to modify size limit from {} to {}, " - "elements limit from {} to {}", - actual_settings.max_size, new_settings.max_size, - actual_settings.max_elements, new_settings.max_elements); + LOG_WARNING( + log, "Unable to modify size limit from {} to {}, elements limit from {} to {}. " + "`max_size` and `max_elements` settings will remain inconsistent with config.xml. " + "Next attempt to update them will happen on the next config reload. " + "You can trigger it with SYSTEM RELOAD CONFIG.", + actual_settings.max_size, new_settings.max_size, + actual_settings.max_elements, new_settings.max_elements); } } From 8ac9cbd80d581183edd7add2417008f531e28656 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Mon, 8 Apr 2024 14:13:43 +0000 Subject: [PATCH 351/470] Add part name to check part exception message --- src/Storages/MergeTree/DataPartsExchange.cpp | 4 +-- .../MergeTree/MergeTreeDataPartChecksum.cpp | 33 ++++++++++--------- .../MergeTree/MergeTreeDataPartChecksum.h | 8 ++--- .../ReplicatedMergeTreePartCheckThread.cpp | 2 +- src/Storages/MergeTree/checkDataPart.cpp | 2 +- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 7 files changed, 27 insertions(+), 26 deletions(-) diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 91444d76a521..c56530b97eb2 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -318,7 +318,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( } if (!from_remote_disk && isFullPartStorage(part->getDataPartStorage())) - part->checksums.checkEqual(data_checksums, false); + part->checksums.checkEqual(data_checksums, false, part->name); return data_checksums; } @@ -906,7 +906,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( else { if (isFullPartStorage(new_data_part->getDataPartStorage())) - new_data_part->checksums.checkEqual(data_checksums, false); + new_data_part->checksums.checkEqual(data_checksums, false, new_data_part->name); LOG_DEBUG(log, "Download of part {} onto disk {} finished.", part_name, disk->getName()); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index d60f4cc73540..7c9e4a371ab5 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -28,33 +28,34 @@ namespace ErrorCodes } -void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const +void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name, const String & part_name) const { if (is_compressed && have_uncompressed) { if (!rhs.is_compressed) - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}", name); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}, data part {}", name, part_name); + if (rhs.uncompressed_size != uncompressed_size) { - throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part ({} vs {})", - name, uncompressed_size, rhs.uncompressed_size); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part {} ({} vs {})", + name, part_name, uncompressed_size, rhs.uncompressed_size); } if (rhs.uncompressed_hash != uncompressed_hash) { - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part ({} vs {})", - name, getHexUIntLowercase(uncompressed_hash), getHexUIntLowercase(rhs.uncompressed_hash)); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part {} ({} vs {})", + name, part_name, getHexUIntLowercase(uncompressed_hash), getHexUIntLowercase(rhs.uncompressed_hash)); } return; } if (rhs.file_size != file_size) { - throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part ({} vs {})", - name, file_size, rhs.file_size); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part {} ({} vs {})", + name, part_name, file_size, rhs.file_size); } if (rhs.file_hash != file_hash) { - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part ({} vs {})", - name, getHexUIntLowercase(file_hash), getHexUIntLowercase(rhs.file_hash)); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part {} ({} vs {})", + name, part_name, getHexUIntLowercase(file_hash), getHexUIntLowercase(rhs.file_hash)); } } @@ -79,7 +80,7 @@ void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, cons } -void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const +void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed, const String & part_name) const { for (const auto & [name, _] : rhs.files) if (!files.contains(name)) @@ -95,7 +96,7 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r if (it == rhs.files.end()) throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name); - checksum.checkEqual(it->second, have_uncompressed, name); + checksum.checkEqual(it->second, have_uncompressed, name, part_name); } } @@ -435,19 +436,19 @@ String MinimalisticDataPartChecksums::getSerializedString(const MergeTreeDataPar return checksums.getSerializedString(); } -void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const +void MinimalisticDataPartChecksums::checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const { if (full_checksums && rhs.full_checksums) - full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files); + full_checksums->checkEqual(*rhs.full_checksums, check_uncompressed_hash_in_compressed_files, part_name); // If full checksums were checked, check total checksums just in case checkEqualImpl(rhs, check_uncompressed_hash_in_compressed_files); } -void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const +void MinimalisticDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const { if (full_checksums) - full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files); + full_checksums->checkEqual(rhs, check_uncompressed_hash_in_compressed_files, part_name); // If full checksums were checked, check total checksums just in case MinimalisticDataPartChecksums rhs_minimalistic; diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index d4980a67a43d..05178dc3a609 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -32,7 +32,7 @@ struct MergeTreeDataPartChecksum : file_size(file_size_), file_hash(file_hash_), is_compressed(true), uncompressed_size(uncompressed_size_), uncompressed_hash(uncompressed_hash_) {} - void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const; + void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name, const String & part_name) const; void checkSize(const IDataPartStorage & storage, const String & name) const; }; @@ -61,7 +61,7 @@ struct MergeTreeDataPartChecksums /// Checks that the set of columns and their checksums are the same. If not, throws an exception. /// If have_uncompressed, for compressed files it compares the checksums of the decompressed data. /// Otherwise, it compares only the checksums of the files. - void checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed) const; + void checkEqual(const MergeTreeDataPartChecksums & rhs, bool have_uncompressed, const String & part_name) const; static bool isBadChecksumsErrorCode(int code); @@ -132,8 +132,8 @@ struct MinimalisticDataPartChecksums String getSerializedString() const; static String getSerializedString(const MergeTreeDataPartChecksums & full_checksums, bool minimalistic); - void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const; - void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const; + void checkEqual(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const; + void checkEqual(const MergeTreeDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files, const String & part_name) const; void checkEqualImpl(const MinimalisticDataPartChecksums & rhs, bool check_uncompressed_hash_in_compressed_files) const; }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 181f54688f91..d7601e6e6387 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -359,7 +359,7 @@ ReplicatedCheckResult ReplicatedMergeTreePartCheckThread::checkPartImpl(const St if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); - zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); + zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true, part_name); checkDataPart( part, diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index d64568e0c3e0..81de9d5a9a15 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -350,7 +350,7 @@ static IMergeTreeDataPart::Checksums checkDataPart( return {}; if (require_checksums || !checksums_txt.files.empty()) - checksums_txt.checkEqual(checksums_data, check_uncompressed); + checksums_txt.checkEqual(checksums_data, check_uncompressed, data_part->name); return checksums_data; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c9f451b6bb17..f7b6d6d3c2a0 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -2319,7 +2319,7 @@ std::optional StorageMergeTree::checkDataNext(DataValidationTasksPt try { auto calculated_checksums = checkDataPart(part, false, noop, /* is_cancelled */[]{ return false; }, /* throw_on_broken_projection */true); - calculated_checksums.checkEqual(part->checksums, true); + calculated_checksums.checkEqual(part->checksums, true, part->name); auto & part_mutable = const_cast(*part); part_mutable.writeChecksums(part->checksums, local_context->getWriteSettings()); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ddb90f066799..f3547ec5b566 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1837,7 +1837,7 @@ bool StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps( "(it may rarely happen on race condition with KILL MUTATION).", part_name, replica); } - replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); + replica_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true, part_name); break; } From f3dc77ee00008f640b4d1f47445a223bbe286000 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 14:15:49 +0000 Subject: [PATCH 352/470] disable autofix for merge queue --- tests/ci/style_check.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 373fa7b316f1..4580f0076065 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -131,6 +131,11 @@ def main(): temp_path.mkdir(parents=True, exist_ok=True) pr_info = PRInfo() + + if pr_info.is_merge_queue() and args.push: + print("Auto style fix will be disabled for Merge Queue workflow") + args.push = False + run_cpp_check = True run_shell_check = True run_python_check = True From 82b2adef97a87de683c1a20ec696c03216416a23 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 8 Apr 2024 14:50:13 +0000 Subject: [PATCH 353/470] Fix GLOBAL IN table queries with analyzer. --- src/Planner/CollectSets.cpp | 52 +++++++++++-------- src/Planner/CollectSets.h | 4 ++ src/Storages/buildQueryTreeForShard.cpp | 7 ++- .../test_cluster_all_replicas/test.py | 11 ++++ 4 files changed, 49 insertions(+), 25 deletions(-) diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index e150b8a5956b..37502828f638 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -24,6 +24,34 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; } +QueryTreeNodePtr makeExecutableSubqueryForIn(const QueryTreeNodePtr & in_second_argument, const ContextPtr & context) +{ + auto subquery_to_execute = in_second_argument; + if (auto * table_node = in_second_argument->as()) + { + auto storage_snapshot = table_node->getStorageSnapshot(); + auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); + size_t columns_to_select_size = columns_to_select.size(); + auto column_nodes_to_select = std::make_shared(); + column_nodes_to_select->getNodes().reserve(columns_to_select_size); + NamesAndTypes projection_columns; + projection_columns.reserve(columns_to_select_size); + for (auto & column : columns_to_select) + { + column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, subquery_to_execute)); + projection_columns.emplace_back(column.name, column.type); + } + auto subquery_for_table = std::make_shared(Context::createCopy(context)); + subquery_for_table->setIsSubquery(true); + subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select); + subquery_for_table->getJoinTree() = std::move(subquery_to_execute); + subquery_for_table->resolveProjectionColumns(std::move(projection_columns)); + subquery_to_execute = std::move(subquery_for_table); + } + + return subquery_to_execute; +} + namespace { @@ -88,29 +116,7 @@ class CollectSetsVisitor : public ConstInDepthQueryTreeVisitoras()) - { - auto storage_snapshot = table_node->getStorageSnapshot(); - auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); - size_t columns_to_select_size = columns_to_select.size(); - auto column_nodes_to_select = std::make_shared(); - column_nodes_to_select->getNodes().reserve(columns_to_select_size); - NamesAndTypes projection_columns; - projection_columns.reserve(columns_to_select_size); - for (auto & column : columns_to_select) - { - column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, subquery_to_execute)); - projection_columns.emplace_back(column.name, column.type); - } - auto subquery_for_table = std::make_shared(Context::createCopy(planner_context.getQueryContext())); - subquery_for_table->setIsSubquery(true); - subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select); - subquery_for_table->getJoinTree() = std::move(subquery_to_execute); - subquery_for_table->resolveProjectionColumns(std::move(projection_columns)); - subquery_to_execute = std::move(subquery_for_table); - } - + auto subquery_to_execute = makeExecutableSubqueryForIn(in_second_argument, planner_context.getQueryContext()); sets.addFromSubquery(set_key, std::move(subquery_to_execute), settings); } else diff --git a/src/Planner/CollectSets.h b/src/Planner/CollectSets.h index 5f9f7a5a4665..e4168c7dd49c 100644 --- a/src/Planner/CollectSets.h +++ b/src/Planner/CollectSets.h @@ -14,4 +14,8 @@ struct SelectQueryOptions; */ void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); +/// Build subqiery which we execute for IN function. +/// It is needed to support `IN table` case. +QueryTreeNodePtr makeExecutableSubqueryForIn(const QueryTreeNodePtr & in_second_argument, const ContextPtr & context); + } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 5284f52a7e42..09e48a93df4b 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -361,10 +362,12 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex { auto & in_function_subquery_node = in_function_node->getArguments().getNodes().at(1); auto in_function_node_type = in_function_subquery_node->getNodeType(); - if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION) + if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION && in_function_node_type != QueryTreeNodeType::TABLE) continue; - auto temporary_table_expression_node = executeSubqueryNode(in_function_subquery_node, + auto subquery_to_execute = makeExecutableSubqueryForIn(in_function_subquery_node, planner_context->getQueryContext()); + + auto temporary_table_expression_node = executeSubqueryNode(subquery_to_execute, planner_context->getMutableQueryContext(), global_in_or_join_node.subquery_depth); diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index eb406de6a8d0..15f3f36f74e2 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -42,6 +42,17 @@ def test_cluster(start_cluster): ) +def test_global_in(start_cluster): + + node1.query("CREATE TABLE u(uid Int16) ENGINE=Log as select 0"); + + assert set( + node1.query( + """SELECT hostName(), * FROM clusterAllReplicas("one_shard_two_nodes", system.one) where dummy GLOBAL IN u""" + ).splitlines() + ) == {"node1\t0", "node2\t0"} + + @pytest.mark.parametrize( "cluster", [ From bcccbe2b19bd50429ce6de1f4e0619c0677fa4a1 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 8 Apr 2024 14:54:36 +0000 Subject: [PATCH 354/470] Fixing typo --- src/Planner/CollectSets.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Planner/CollectSets.h b/src/Planner/CollectSets.h index e4168c7dd49c..0ee006f3320d 100644 --- a/src/Planner/CollectSets.h +++ b/src/Planner/CollectSets.h @@ -14,7 +14,7 @@ struct SelectQueryOptions; */ void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); -/// Build subqiery which we execute for IN function. +/// Build subquery which we execute for IN function. /// It is needed to support `IN table` case. QueryTreeNodePtr makeExecutableSubqueryForIn(const QueryTreeNodePtr & in_second_argument, const ContextPtr & context); From 7fcfbffd0cf215e87d36aa515d832577256ce2e3 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Mon, 8 Apr 2024 15:01:11 +0000 Subject: [PATCH 355/470] Automatic style fix --- tests/integration/test_cluster_all_replicas/test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index 15f3f36f74e2..59b41ca87afb 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -43,8 +43,7 @@ def test_cluster(start_cluster): def test_global_in(start_cluster): - - node1.query("CREATE TABLE u(uid Int16) ENGINE=Log as select 0"); + node1.query("CREATE TABLE u(uid Int16) ENGINE=Log as select 0") assert set( node1.query( From 603824748d7bec40d8dc7b30a33a988e214c3328 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 15:03:13 +0000 Subject: [PATCH 356/470] CI: disable finish check for mq --- .github/workflows/pull_request.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index ff0adee14433..74ce8452de8b 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -157,7 +157,7 @@ jobs: ################################# Stage Final ################################# # FinishCheck: - if: ${{ !failure() && !cancelled() }} + if: ${{ !failure() && !cancelled() && github.event_name != 'merge_group' }} needs: [Tests_1, Tests_2] runs-on: [self-hosted, style-checker] steps: From f9402f3584aab08aeacaed8c278a8c26a526624e Mon Sep 17 00:00:00 2001 From: flynn Date: Mon, 8 Apr 2024 15:15:45 +0000 Subject: [PATCH 357/470] Fix --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 107 +++++++++++----------- 1 file changed, 53 insertions(+), 54 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index e29e8f2f1369..48f326521989 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2275,57 +2275,62 @@ void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_ */ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope) { - auto & node_list_typed = node_list->as(); - - for (auto & node : node_list_typed.getNodes()) + const auto & settings = scope.context->getSettingsRef(); + if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) { - auto * node_to_replace = &node; + auto & node_list_typed = node_list->as(); - if (auto * sort_node = node->as()) - node_to_replace = &sort_node->getExpression(); + for (auto & node : node_list_typed.getNodes()) + { + auto * node_to_replace = &node; - auto * constant_node = (*node_to_replace)->as(); + if (auto * sort_node = node->as()) + node_to_replace = &sort_node->getExpression(); - if (!constant_node - || (constant_node->getValue().getType() != Field::Types::UInt64 && constant_node->getValue().getType() != Field::Types::Int64)) - continue; + auto * constant_node = (*node_to_replace)->as(); - UInt64 pos; - if (constant_node->getValue().getType() == Field::Types::UInt64) - { - pos = constant_node->getValue().get(); - } - else // Int64 - { - auto value = constant_node->getValue().get(); - if (value > 0) - pos = value; - else + if (!constant_node + || (constant_node->getValue().getType() != Field::Types::UInt64 + && constant_node->getValue().getType() != Field::Types::Int64)) + continue; + + UInt64 pos; + if (constant_node->getValue().getType() == Field::Types::UInt64) { - if (static_cast(std::abs(value)) > projection_nodes.size()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]. In scope {}", - value, - projection_nodes.size(), - scope.scope_node->formatASTForErrorMessage()); - pos = projection_nodes.size() + value + 1; + pos = constant_node->getValue().get(); + } + else // Int64 + { + auto value = constant_node->getValue().get(); + if (value > 0) + pos = value; + else + { + if (static_cast(std::abs(value)) > projection_nodes.size()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]. In scope {}", + value, + projection_nodes.size(), + scope.scope_node->formatASTForErrorMessage()); + pos = projection_nodes.size() + value + 1; + } } - } - if (!pos || pos > projection_nodes.size()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}", - pos, - projection_nodes.size(), - scope.scope_node->formatASTForErrorMessage()); + if (!pos || pos > projection_nodes.size()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}", + pos, + projection_nodes.size(), + scope.scope_node->formatASTForErrorMessage()); - --pos; - *node_to_replace = projection_nodes[pos]->clone(); - if (auto it = resolved_expressions.find(projection_nodes[pos]); it != resolved_expressions.end()) - { - resolved_expressions[*node_to_replace] = it->second; + --pos; + *node_to_replace = projection_nodes[pos]->clone(); + if (auto it = resolved_expressions.find(projection_nodes[pos]); it != resolved_expressions.end()) + { + resolved_expressions[*node_to_replace] = it->second; + } } } } @@ -6674,15 +6679,12 @@ void expandTuplesInList(QueryTreeNodes & key_list) */ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierResolveScope & scope) { - const auto & settings = scope.context->getSettingsRef(); - if (query_node_typed.isGroupByWithGroupingSets()) { QueryTreeNodes nullable_group_by_keys; for (auto & grouping_sets_keys_list_node : query_node_typed.getGroupBy().getNodes()) { - if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(grouping_sets_keys_list_node, query_node_typed.getProjection().getNodes(), scope); // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) @@ -6701,8 +6703,7 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR } else { - if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(query_node_typed.getGroupByNode(), query_node_typed.getProjection().getNodes(), scope); // Remove redundant calls to `tuple` function. It simplifies checking if expression is an aggregation key. // It's required to support queries like: SELECT number FROM numbers(3) GROUP BY (number, number % 2) @@ -7853,8 +7854,6 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.isCTE()) cte_in_resolve_process.insert(query_node_typed.getCTEName()); - const auto & settings = scope.context->getSettingsRef(); - bool is_rollup_or_cube = query_node_typed.isGroupByWithRollup() || query_node_typed.isGroupByWithCube(); if (query_node_typed.isGroupByWithGroupingSets() @@ -8038,8 +8037,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasOrderBy()) { - if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(query_node_typed.getOrderByNode(), query_node_typed.getProjection().getNodes(), scope); + + const auto & settings = scope.context->getSettingsRef(); expandOrderByAll(query_node_typed, settings); resolveSortNodeList(query_node_typed.getOrderByNode(), scope); @@ -8062,8 +8062,7 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier if (query_node_typed.hasLimitBy()) { - if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope); + replaceNodesWithPositionalArguments(query_node_typed.getLimitByNode(), query_node_typed.getProjection().getNodes(), scope); resolveExpressionNodeList(query_node_typed.getLimitByNode(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); } From c69d8e18f245b67057f9c77c1c5e06352c805529 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 8 Apr 2024 15:38:15 +0000 Subject: [PATCH 358/470] Dont use constant database name --- ...unknown_identifier_materialized_column.sql | 10 ++++---- .../0_stateless/03053_analyzer_join_alias.sql | 16 ++++++------- ...same_table_name_in_different_databases.sql | 24 +++++++++---------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql index 276e48458317..938f270b9e4c 100644 --- a/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql +++ b/tests/queries/0_stateless/03049_unknown_identifier_materialized_column.sql @@ -1,14 +1,14 @@ -- https://github.com/ClickHouse/ClickHouse/issues/54317 SET allow_experimental_analyzer=1; -DROP DATABASE IF EXISTS 03049_database; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; -CREATE DATABASE 03049_database; -USE 03049_database; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; +USE {CLICKHOUSE_DATABASE:Identifier}; CREATE TABLE l (y String) Engine Memory; CREATE TABLE r (d Date, y String, ty UInt16 MATERIALIZED toYear(d)) Engine Memory; select * from l L left join r R on L.y = R.y where R.ty >= 2019; select * from l left join r on l.y = r.y where r.ty >= 2019; -select * from 03049_database.l left join 03049_database.r on l.y = r.y where r.ty >= 2019; +select * from {CLICKHOUSE_DATABASE:Identifier}.l left join {CLICKHOUSE_DATABASE:Identifier}.r on l.y = r.y where r.ty >= 2019; -DROP DATABASE IF EXISTS 03049_database; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; diff --git a/tests/queries/0_stateless/03053_analyzer_join_alias.sql b/tests/queries/0_stateless/03053_analyzer_join_alias.sql index ef51ec730261..894b8af7c6f4 100644 --- a/tests/queries/0_stateless/03053_analyzer_join_alias.sql +++ b/tests/queries/0_stateless/03053_analyzer_join_alias.sql @@ -1,9 +1,9 @@ -- https://github.com/ClickHouse/ClickHouse/issues/23104 SET allow_experimental_analyzer=1; -DROP DATABASE IF EXISTS test_03053; -CREATE DATABASE test_03053; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; -CREATE TABLE test_03053.base +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.base ( `id` UInt64, `id2` UInt64, @@ -14,7 +14,7 @@ ENGINE=MergeTree() PARTITION BY d ORDER BY (id,id2,d); -CREATE TABLE test_03053.derived1 +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.derived1 ( `id1` UInt64, `d1` UInt64, @@ -24,7 +24,7 @@ ENGINE = MergeTree() PARTITION BY d1 ORDER BY (id1, d1); -CREATE TABLE test_03053.derived2 +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.derived2 ( `id2` UInt64, `d2` UInt64, @@ -39,6 +39,6 @@ SELECT derived2.id2 AS `derived2.id2`, derived2.value2 AS `derived2.value2`, derived1.value1 AS `derived1.value1` -FROM test_03053.base AS base -LEFT JOIN test_03053.derived2 AS derived2 ON base.id2 = derived2.id2 -LEFT JOIN test_03053.derived1 AS derived1 ON base.id = derived1.id1; +FROM {CLICKHOUSE_DATABASE:Identifier}.base AS base +LEFT JOIN {CLICKHOUSE_DATABASE:Identifier}.derived2 AS derived2 ON base.id2 = derived2.id2 +LEFT JOIN {CLICKHOUSE_DATABASE:Identifier}.derived1 AS derived1 ON base.id = derived1.id1; diff --git a/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql index 03ad9c97d94d..10d18324c3c4 100644 --- a/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql +++ b/tests/queries/0_stateless/03092_analyzer_same_table_name_in_different_databases.sql @@ -1,18 +1,18 @@ -- https://github.com/ClickHouse/ClickHouse/issues/61947 SET allow_experimental_analyzer=1; -DROP DATABASE IF EXISTS d3; -DROP DATABASE IF EXISTS d4; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -CREATE DATABASE d3; -CREATE DATABASE d4; -CREATE TABLE d3.`1-1` (field Int8) ENGINE = Memory; -CREATE TABLE d4.`2-1` (field Int8) ENGINE = Memory; -CREATE TABLE d4.`3-1` (field Int8) ENGINE = Memory; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`2-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`3-1` (field Int8) ENGINE = Memory; -INSERT INTO d3.`1-1` VALUES (1); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.`1-1` VALUES (1); -SELECT d3.`1-1`.* -FROM d3.`1-1` -LEFT JOIN d4.`2-1` ON d3.`1-1`.field = d4.`2-1`.field -LEFT JOIN d4.`3-1` ON d4.`2-1`.field = d4.`3-1`.field; +SELECT {CLICKHOUSE_DATABASE:Identifier}.`1-1`.* +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`2-1` ON {CLICKHOUSE_DATABASE:Identifier}.`1-1`.field = {CLICKHOUSE_DATABASE_1:Identifier}.`2-1`.field +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`3-1` ON {CLICKHOUSE_DATABASE_1:Identifier}.`2-1`.field = {CLICKHOUSE_DATABASE_1:Identifier}.`3-1`.field; From 6277deb166b5ca3451cc47b4a4abb71604505405 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 8 Apr 2024 17:59:54 +0200 Subject: [PATCH 359/470] small fixes --- src/Storages/MemorySettings.cpp | 4 ++-- src/Storages/StorageMemory.cpp | 20 ++++++++++---------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Storages/MemorySettings.cpp b/src/Storages/MemorySettings.cpp index 7c8c9da209b4..30ae4e126684 100644 --- a/src/Storages/MemorySettings.cpp +++ b/src/Storages/MemorySettings.cpp @@ -46,14 +46,14 @@ void MemorySettings::sanityCheck() const { if (min_bytes_to_keep > max_bytes_to_keep) throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, - "`min_bytes_to_keep` setting cannot be higher than `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}", + "Setting `min_bytes_to_keep` cannot be higher than the `max_bytes_to_keep`. `min_bytes_to_keep`: {}, `max_bytes_to_keep`: {}", min_bytes_to_keep, max_bytes_to_keep); if (min_rows_to_keep > max_rows_to_keep) throw Exception(ErrorCodes::SETTING_CONSTRAINT_VIOLATION, - "`min_rows_to_keep` setting cannot be higher than `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}", + "Setting `min_rows_to_keep` cannot be higher than the `max_rows_to_keep`. `min_rows_to_keep`: {}, `max_rows_to_keep`: {}", min_rows_to_keep, max_rows_to_keep); } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index c5feb6dad21e..f69c4adb5521 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -304,14 +304,14 @@ void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr conte if (params.isSettingsAlter()) { auto & settings_changes = new_metadata.settings_changes->as(); - auto copy = memory_settings; - copy.applyChanges(settings_changes.changes); - copy.sanityCheck(); + auto changed_settings = memory_settings; + changed_settings.applyChanges(settings_changes.changes); + changed_settings.sanityCheck(); /// When modifying the values of max_bytes_to_keep and max_rows_to_keep to be smaller than the old values, /// the old data needs to be removed. - if (!memory_settings.max_bytes_to_keep || memory_settings.max_bytes_to_keep > copy.max_bytes_to_keep - || !memory_settings.max_rows_to_keep || memory_settings.max_rows_to_keep > copy.max_rows_to_keep) + if (!memory_settings.max_bytes_to_keep || memory_settings.max_bytes_to_keep > changed_settings.max_bytes_to_keep + || !memory_settings.max_rows_to_keep || memory_settings.max_rows_to_keep > changed_settings.max_rows_to_keep) { std::lock_guard lock(mutex); @@ -319,14 +319,14 @@ void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr conte UInt64 new_total_rows = total_size_rows.load(std::memory_order_relaxed); UInt64 new_total_bytes = total_size_bytes.load(std::memory_order_relaxed); while (!new_data->empty() - && ((copy.max_bytes_to_keep && new_total_bytes > copy.max_bytes_to_keep) - || (copy.max_rows_to_keep && new_total_rows > copy.max_rows_to_keep))) + && ((changed_settings.max_bytes_to_keep && new_total_bytes > changed_settings.max_bytes_to_keep) + || (changed_settings.max_rows_to_keep && new_total_rows > changed_settings.max_rows_to_keep))) { Block oldest_block = new_data->front(); UInt64 rows_to_remove = oldest_block.rows(); UInt64 bytes_to_remove = oldest_block.allocatedBytes(); - if (new_total_bytes - bytes_to_remove < copy.min_bytes_to_keep - || new_total_rows - rows_to_remove < copy.min_rows_to_keep) + if (new_total_bytes - bytes_to_remove < changed_settings.min_bytes_to_keep + || new_total_rows - rows_to_remove < changed_settings.min_rows_to_keep) { break; // stop - removing next block will put us under min_bytes / min_rows threshold } @@ -341,7 +341,7 @@ void StorageMemory::alter(const DB::AlterCommands & params, DB::ContextPtr conte total_size_rows.store(new_total_rows, std::memory_order_relaxed); total_size_bytes.store(new_total_bytes, std::memory_order_relaxed); } - memory_settings = std::move(copy); + memory_settings = std::move(changed_settings); } DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); From baa62cdaeeb23aba770efe6368bba6ec97cf6214 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Mon, 8 Apr 2024 16:09:47 +0000 Subject: [PATCH 360/470] CI: no CI Running status for MQ --- tests/ci/run_check.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/ci/run_check.py b/tests/ci/run_check.py index 6187656983e8..435a5f726f25 100644 --- a/tests/ci/run_check.py +++ b/tests/ci/run_check.py @@ -201,14 +201,17 @@ def main(): ci_report_url = create_ci_report(pr_info, []) print("::notice ::Can run") - post_commit_status( - commit, - PENDING, - ci_report_url, - description, - CI_STATUS_NAME, - pr_info, - ) + + if not pr_info.is_merge_queue(): + # we need clean CI status for MQ to merge (no pending statuses) + post_commit_status( + commit, + PENDING, + ci_report_url, + description, + CI_STATUS_NAME, + pr_info, + ) if __name__ == "__main__": From 39c6188a2c0b7014136e1d9d9f16c684741fb0cb Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Apr 2024 16:38:19 +0000 Subject: [PATCH 361/470] Fix logical error 'numbers_storage.step != UInt64{0}' --- src/TableFunctions/TableFunctionNumbers.cpp | 4 ++++ .../03037_zero_step_in_numbers_table_function.reference | 0 .../0_stateless/03037_zero_step_in_numbers_table_function.sql | 2 ++ 3 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference create mode 100644 tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index 2989eb5fbef0..16f56eab9812 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int ILLEGAL_TYPE_OF_ARGUMENT; +extern const int BAD_ARGUMENTS; } namespace @@ -78,6 +79,9 @@ StoragePtr TableFunctionNumbers::executeImpl( UInt64 length = arguments.size() >= 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); UInt64 step = arguments.size() == 3 ? evaluateArgument(context, arguments[2]) : 1; + if (!step) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function {} requires step to be a positive number", getName()); + auto res = std::make_shared( StorageID(getDatabaseName(), table_name), multithreaded, std::string{"number"}, length, offset, step); res->startup(); diff --git a/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql new file mode 100644 index 000000000000..08fafd6ddfa8 --- /dev/null +++ b/tests/queries/0_stateless/03037_zero_step_in_numbers_table_function.sql @@ -0,0 +1,2 @@ +select * from numbers(1, 10, 0); -- {serverError BAD_ARGUMENTS} + From e5282bf39fa1a930b0d93bb4077e1baf6082f6c7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Mon, 8 Apr 2024 16:39:55 +0000 Subject: [PATCH 362/470] =?UTF-8?q?I=20=E2=99=A5=20group=5Fby=5Fuse=5Fnull?= =?UTF-8?q?s.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 14 ++++++++++++-- .../03023_group_by_use_nulls_analyzer_crashes.sql | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 4d862639e15b..793cec912932 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6694,8 +6694,11 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR { for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes()) { - for (const auto & group_by_elem : grouping_set->as()->getNodes()) + for (auto & group_by_elem : grouping_set->as()->getNodes()) + { + group_by_elem = group_by_elem->clone(); scope.nullable_group_by_keys.insert(group_by_elem); + } } } } @@ -6713,8 +6716,15 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR if (scope.group_by_use_nulls) { - for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes()) + for (auto & group_by_elem : query_node_typed.getGroupBy().getNodes()) + { + /// Clone is needed cause aliases share subtrees. + /// If not clone, a part of GROUP BY key could be replaced to nullable + /// by replacing a part of alias from another subtree to nullable. + /// See 03023_group_by_use_nulls_analyzer_crashes + group_by_elem = group_by_elem->clone(); scope.nullable_group_by_keys.insert(group_by_elem); + } } } } diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 2f4c8b1c75ea..b6ca454c1cfe 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -9,3 +9,11 @@ SELECT tuple(number) AS x FROM numbers(10) GROUP BY GROUPING SETS (number) order SELECT ignore(toFixedString('Lambda as function parameter', 28), toNullable(28), ignore(8)), sum(marks) FROM system.parts WHERE database = currentDatabase() GROUP BY GROUPING SETS ((2)) FORMAT Null settings optimize_injective_functions_in_group_by=1, optimize_group_by_function_keys=1, group_by_use_nulls=1; -- { serverError ILLEGAL_AGGREGATION } SELECT toLowCardinality(materialize('a' AS key)), 'b' AS value GROUP BY key WITH CUBE SETTINGS group_by_use_nulls = 1; + +SELECT tuple(tuple(number)) AS x +FROM numbers(10) +GROUP BY (number, (toString(x), number)) + WITH CUBE +SETTINGS group_by_use_nulls = 1 FORMAT Null; + +SELECT tuple(number + 1) AS x FROM numbers(10) GROUP BY number + 1, toString(x) WITH CUBE settings group_by_use_nulls=1 FORMAT Null; From b318091528eed9db6d04d25bae115d24d3b82eb8 Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 8 Apr 2024 17:17:04 +0000 Subject: [PATCH 363/470] Don't check overflow in dotProduct in undefined sanitizer --- src/Functions/array/arrayDotProduct.cpp | 4 ++-- .../queries/0_stateless/03037_dot_product_overflow.reference | 1 + tests/queries/0_stateless/03037_dot_product_overflow.sql | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/03037_dot_product_overflow.reference create mode 100644 tests/queries/0_stateless/03037_dot_product_overflow.sql diff --git a/src/Functions/array/arrayDotProduct.cpp b/src/Functions/array/arrayDotProduct.cpp index 783843a89d51..4551140acc3e 100644 --- a/src/Functions/array/arrayDotProduct.cpp +++ b/src/Functions/array/arrayDotProduct.cpp @@ -66,13 +66,13 @@ struct DotProduct }; template - static void accumulate(State & state, Type x, Type y) + static NO_SANITIZE_UNDEFINED void accumulate(State & state, Type x, Type y) { state.sum += x * y; } template - static void combine(State & state, const State & other_state) + static NO_SANITIZE_UNDEFINED void combine(State & state, const State & other_state) { state.sum += other_state.sum; } diff --git a/tests/queries/0_stateless/03037_dot_product_overflow.reference b/tests/queries/0_stateless/03037_dot_product_overflow.reference new file mode 100644 index 000000000000..573541ac9702 --- /dev/null +++ b/tests/queries/0_stateless/03037_dot_product_overflow.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/03037_dot_product_overflow.sql b/tests/queries/0_stateless/03037_dot_product_overflow.sql new file mode 100644 index 000000000000..94d5eba62552 --- /dev/null +++ b/tests/queries/0_stateless/03037_dot_product_overflow.sql @@ -0,0 +1,2 @@ +select ignore(dotProduct(materialize([9223372036854775807, 1]), materialize([-3, 1]))); + From f426b5f12baf3ab9b49f02e26c5e111d8506ec2c Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Apr 2024 19:21:44 +0200 Subject: [PATCH 364/470] Fix spell-check --- docs/en/sql-reference/functions/array-functions.md | 4 ++-- utils/check-style/aspell-ignore/en/aspell-dict.txt | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index c8bb3ee7604a..f3a031f9da74 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -987,7 +987,7 @@ Result: [[1,1,1],[2,3,2],[2]] ``` -Changing `clear_depth=2`, results in elements being enumerated seperately for each row. +Changing `clear_depth=2`, results in elements being enumerated separately for each row. Query: @@ -1703,7 +1703,7 @@ Result: [[1,1,2,3],[4,5,1,2]] ``` -Changing `clear_depth=2` results in the enumeration occuring separetely for each row anew. +Changing `clear_depth=2` results in the enumeration occurring separately for each row anew. Query: diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8aa2a463c477..c3bf44666d2c 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -29,6 +29,13 @@ Alexey AnyEvent AppleClang Approximative +arrayDotProduct +arrayEnumerateDenseRanked +arrayEnumerateUniqRanked +arrayFirstOrNull +arrayLastOrNull +arrayPartialShuffle +arrayShuffle ArrayJoin ArrowStream AsyncInsertCacheSize From 75aff7fc1aed1c56cfd406466a99d5739895f0c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Mon, 8 Apr 2024 15:50:10 +0200 Subject: [PATCH 365/470] Remove the code --- contrib/avro-cmake/CMakeLists.txt | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/contrib/avro-cmake/CMakeLists.txt b/contrib/avro-cmake/CMakeLists.txt index c99c7dd4624d..96f740b6dd2e 100644 --- a/contrib/avro-cmake/CMakeLists.txt +++ b/contrib/avro-cmake/CMakeLists.txt @@ -59,15 +59,3 @@ target_link_libraries (_avrocpp PRIVATE boost::headers_only boost::iostreams) target_compile_definitions (_avrocpp PUBLIC SNAPPY_CODEC_AVAILABLE) target_include_directories (_avrocpp PRIVATE ${SNAPPY_INCLUDE_DIR}) target_link_libraries (_avrocpp PRIVATE ch_contrib::snappy) - -# create a symlink to include headers with -set(AVRO_INCLUDE_DIR "${CMAKE_CURRENT_BINARY_DIR}/include") -ADD_CUSTOM_COMMAND(OUTPUT "${AVRO_INCLUDE_DIR}" - COMMAND ${CMAKE_COMMAND} -E make_directory "${AVRO_INCLUDE_DIR}" - COMMAND ${CMAKE_COMMAND} -E create_symlink "${AVROCPP_ROOT_DIR}/api" "${AVRO_INCLUDE_DIR}/avro" - DEPENDS "${AVROCPP_ROOT_DIR}/api" -) -ADD_CUSTOM_TARGET(avro_symlink_headers ALL - DEPENDS "${AVRO_INCLUDE_DIR}") -add_dependencies(_avrocpp avro_symlink_headers) -target_include_directories(_avrocpp SYSTEM BEFORE PUBLIC "${AVRO_INCLUDE_DIR}") From 0697f7acb0a22b02faaa962c57214fbfc375f020 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 2 Apr 2024 22:38:02 +0200 Subject: [PATCH 366/470] Add tests --- ...inguish_bool_and_int_in_settings.reference | 35 ++++++++++ ...1_distinguish_bool_and_int_in_settings.sql | 65 +++++++++++++++++++ .../03032_async_backup_restore.reference | 5 ++ .../0_stateless/03032_async_backup_restore.sh | 56 ++++++++++++++++ 4 files changed, 161 insertions(+) create mode 100644 tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.reference create mode 100644 tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.sql create mode 100644 tests/queries/0_stateless/03032_async_backup_restore.reference create mode 100755 tests/queries/0_stateless/03032_async_backup_restore.sh diff --git a/tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.reference b/tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.reference new file mode 100644 index 000000000000..fc1a2052b689 --- /dev/null +++ b/tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.reference @@ -0,0 +1,35 @@ +-- Custom settings from system.settings +custom_f1 Bool_0 Custom +custom_f2 Bool_0 Custom +custom_f3 Bool_0 Custom +custom_n0 UInt64_0 Custom +custom_n1 UInt64_1 Custom +custom_t1 Bool_1 Custom +custom_t2 Bool_1 Custom +custom_t3 Bool_1 Custom +-- Custom settings via getSetting() +custom_f1 false Bool +custom_f2 false Bool +custom_f3 false Bool +custom_n0 0 UInt8 +custom_n1 1 UInt8 +custom_t1 true Bool +custom_t2 true Bool +custom_t3 true Bool +-- Built-in settings +async_insert 0 Bool +async_insert false Bool +async_insert 0 Bool +async_insert false Bool +async_insert 0 Bool +async_insert false Bool +async_insert 0 Bool +async_insert false Bool +async_insert 1 Bool +async_insert true Bool +async_insert 1 Bool +async_insert true Bool +async_insert 1 Bool +async_insert true Bool +async_insert 1 Bool +async_insert true Bool diff --git a/tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.sql b/tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.sql new file mode 100644 index 000000000000..33be34a40a94 --- /dev/null +++ b/tests/queries/0_stateless/03031_distinguish_bool_and_int_in_settings.sql @@ -0,0 +1,65 @@ +-- Custom settings must remember their types - whether it's a boolean or an integer. + +-- Different ways to set a boolean. +SET custom_f1 = false; +SET custom_f2 = False; +SET custom_f3 = FALSE; + +SET custom_n0 = 0; +SET custom_n1 = 1; + +SET custom_t1 = true; +SET custom_t2 = True; +SET custom_t3 = TRUE; + +SELECT '-- Custom settings from system.settings'; + +SELECT name, value, type FROM system.settings WHERE startsWith(name, 'custom_') ORDER BY name; + +SELECT '-- Custom settings via getSetting()'; + +SELECT 'custom_f1' AS name, getSetting(name) AS value, toTypeName(value); +SELECT 'custom_f2' AS name, getSetting(name) AS value, toTypeName(value); +SELECT 'custom_f3' AS name, getSetting(name) AS value, toTypeName(value); + +SELECT 'custom_n0' AS name, getSetting(name) AS value, toTypeName(value); +SELECT 'custom_n1' AS name, getSetting(name) AS value, toTypeName(value); + +SELECT 'custom_t1' AS name, getSetting(name) AS value, toTypeName(value); +SELECT 'custom_t2' AS name, getSetting(name) AS value, toTypeName(value); +SELECT 'custom_t3' AS name, getSetting(name) AS value, toTypeName(value); + +-- Built-in settings have hardcoded types. +SELECT '-- Built-in settings'; + +SET async_insert = false; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); + +SET async_insert = False; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); + +SET async_insert = FALSE; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); + +SET async_insert = 0; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); + +SET async_insert = 1; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); + +SET async_insert = true; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); + +SET async_insert = True; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); + +SET async_insert = TRUE; +SELECT name, value, type FROM system.settings WHERE name = 'async_insert'; +SELECT 'async_insert' AS name, getSetting(name) AS value, toTypeName(value); diff --git a/tests/queries/0_stateless/03032_async_backup_restore.reference b/tests/queries/0_stateless/03032_async_backup_restore.reference new file mode 100644 index 000000000000..de99716769b0 --- /dev/null +++ b/tests/queries/0_stateless/03032_async_backup_restore.reference @@ -0,0 +1,5 @@ +BACKUP_CREATED +RESTORED +2 +80 +-12345 diff --git a/tests/queries/0_stateless/03032_async_backup_restore.sh b/tests/queries/0_stateless/03032_async_backup_restore.sh new file mode 100755 index 000000000000..81fe12bb0f1b --- /dev/null +++ b/tests/queries/0_stateless/03032_async_backup_restore.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE IF EXISTS tbl; +DROP TABLE IF EXISTS tbl2; +CREATE TABLE tbl (a Int32) ENGINE = MergeTree() ORDER BY tuple(); +INSERT INTO tbl VALUES (2), (80), (-12345); +" + +function start_async() +{ + local command="$1" + local first_column="s/^\([^\t]*\)\t.*/\1/" + echo $(${CLICKHOUSE_CLIENT} --query "$command" | sed "${first_column}") +} + +function wait_status() +{ + local operation_id="$1" + local expected_status="$2" + local timeout=60 + local start=$EPOCHSECONDS + while true; do + local current_status=$(${CLICKHOUSE_CLIENT} --query "SELECT status FROM system.backups WHERE id='${operation_id}'") + if [ "${current_status}" == "${expected_status}" ]; then + echo "${current_status}" + break + fi + if ((EPOCHSECONDS-start > timeout )); then + echo "Timeout while waiting for operation ${operation_id} to come to status ${expected_status}. The current status is ${current_status}." + exit 1 + fi + sleep 0.1 + done +} + +# Making a backup. +backup_name="Disk('backups', '${CLICKHOUSE_TEST_UNIQUE_NAME}')" +backup_operation_id=$(start_async "BACKUP TABLE tbl TO ${backup_name} ASYNC") +wait_status ${backup_operation_id} "BACKUP_CREATED" + +# Restoring from that backup. +restore_operation_id=$(start_async "RESTORE TABLE tbl AS tbl2 FROM ${backup_name} ASYNC") +wait_status ${restore_operation_id} "RESTORED" + +# Check the result of that restoration. +${CLICKHOUSE_CLIENT} --query "SELECT * FROM tbl2" + +${CLICKHOUSE_CLIENT} -nm --query " +DROP TABLE tbl; +DROP TABLE tbl2; +" From bfb470535a4b50555fe5b59a1e5615a691757af3 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 8 Apr 2024 15:28:42 +0200 Subject: [PATCH 367/470] Allow conversions in Field::safeGet(): int64 <-> uint64 and int64 <-> bool and uint64 <-> bool --- src/Core/Field.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Core/Field.h b/src/Core/Field.h index eb01be6c43db..4424d669c4dc 100644 --- a/src/Core/Field.h +++ b/src/Core/Field.h @@ -898,11 +898,13 @@ NearestFieldType> & Field::get() template auto & Field::safeGet() { - const Types::Which requested = TypeToEnum>>::value; + const Types::Which target = TypeToEnum>>::value; - if (which != requested) + /// We allow converting int64 <-> uint64, int64 <-> bool, uint64 <-> bool in safeGet(). + if (target != which + && (!isInt64OrUInt64orBoolFieldType(target) || !isInt64OrUInt64orBoolFieldType(which))) throw Exception(ErrorCodes::BAD_GET, - "Bad get: has {}, requested {}", getTypeName(), requested); + "Bad get: has {}, requested {}", getTypeName(), target); return get(); } From ade0a311cf25869f84adddb8739491095b91ea89 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 8 Apr 2024 20:03:12 +0200 Subject: [PATCH 368/470] Fix tests --- tests/integration/test_distributed_config/test.py | 2 +- tests/queries/0_stateless/02494_query_cache_secrets.reference | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_distributed_config/test.py b/tests/integration/test_distributed_config/test.py index bf4bb5a4335c..e551e69b93f4 100644 --- a/tests/integration/test_distributed_config/test.py +++ b/tests/integration/test_distributed_config/test.py @@ -31,7 +31,7 @@ def test_distibuted_settings(start_cluster): DETACH TABLE dist_1; """ ) - assert "flush_on_detach = 1" in node.query("SHOW CREATE dist_1") + assert "flush_on_detach = true" in node.query("SHOW CREATE dist_1") # flush_on_detach=true, so data_1 should have 1 row assert int(node.query("SELECT count() FROM data_1")) == 1 diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.reference b/tests/queries/0_stateless/02494_query_cache_secrets.reference index 306374eed4b1..82833f28369a 100644 --- a/tests/queries/0_stateless/02494_query_cache_secrets.reference +++ b/tests/queries/0_stateless/02494_query_cache_secrets.reference @@ -1,2 +1,2 @@ A2193552DCF8A9F99AC35F86BC4D2FFD -SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = 1 +SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS use_query_cache = true From 75e46f865c1d445c23d55e918f1bb83669250b46 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 8 Apr 2024 19:19:18 +0200 Subject: [PATCH 369/470] Avoid uncaught exception for onFault handler onFault() is called from the std::thread, and it should catch all exceptions, otherwise you can unrelated fatal errors.
stacktrace 2024.04.08 13:15:29.526847 [ 2067427 ] {} BaseDaemon: (version 24.2.2.71 (official build), build id: 57F857DCFE8BA6838F6463E4665CD700852BFF0E, git hash: 9293d361e72be9f6ccfd444d504e2137b2e837cf) (from thread 2118603) Terminate called for uncaught exception: 2024.04.08 13:15:29.526904 [ 2067427 ] {} BaseDaemon: Code: 210. DB::NetException: I/O error: Broken pipe, while writing to socket (10.61.7.253:9000 -> 10.101.53.134:46036). (NETWORK_ERROR), Stack trace (when copying this message, always include the lines below): 2024.04.08 13:15:29.526983 [ 2067427 ] {} BaseDaemon: 2024.04.08 13:15:29.527042 [ 2067427 ] {} BaseDaemon: 0. ./build_docker/./src/Common/Exception.cpp:96: DB::Exception::Exception(DB::Exception::MessageMasked&&, int, bool) @ 0x000000000cf5af1b 2024.04.08 13:15:29.527061 [ 2067427 ] {} BaseDaemon: 1. ./contrib/llvm-project/libcxx/include/string:1499: DB::NetException::NetException(int, FormatStringHelperImpl::type, std::type_identity::type, std::type_identity::type>, String&&, String&&, String&&) @ 0x000000000d07dfe1 2024.04.08 13:15:29.527082 [ 2067427 ] {} BaseDaemon: 2. ./build_docker/./src/IO/WriteBufferFromPocoSocket.cpp:0: DB::WriteBufferFromPocoSocket::nextImpl() @ 0x000000000d07e97e 2024.04.08 13:15:29.527125 [ 2067427 ] {} BaseDaemon: 3. ./src/IO/WriteBuffer.h:65: DB::TCPHandler::sendLogs() @ 0x0000000012fd31c1 2024.04.08 13:15:29.527144 [ 2067427 ] {} BaseDaemon: 4. ./contrib/llvm-project/libcxx/include/atomic:958: void std::__function::__policy_invoker::__call_impl>(std::__function::__policy_storage const*) @ 0x0000000012fdcc6e 2024.04.08 13:15:29.527163 [ 2067427 ] {} BaseDaemon: 5. ./contrib/llvm-project/libcxx/include/__functional/function.h:0: ? @ 0x000000000d25c65b 2024.04.08 13:15:29.527182 [ 2067427 ] {} BaseDaemon: 6. ./build_docker/./src/Daemon/BaseDaemon.cpp:286: void* std::__thread_proxy[abi:v15000]>, SignalListener::run()::'lambda'()>>(void*) @ 0x000000000d25e775 2024.04.08 13:15:29.527191 [ 2067427 ] {} BaseDaemon: 7. ? @ 0x00007f0fe4906609 2024.04.08 13:15:29.527211 [ 2067427 ] {} BaseDaemon: 8. ? @ 0x00007f0fe482b353 2024.04.08 13:15:29.534235 [ 2118604 ] {} BaseDaemon: ########## Short fault info ############ 2024.04.08 13:15:29.534347 [ 2118604 ] {} BaseDaemon: (version 24.2.2.71 (official build), build id: 57F857DCFE8BA6838F6463E4665CD700852BFF0E, git hash: 9293d361e72be9f6ccfd444d504e2137b2e837cf) (from thread 2118603) Received signal 6 2024.04.08 13:15:29.534476 [ 2118604 ] {} BaseDaemon: Signal description: Aborted 2024.04.08 13:15:29.534484 [ 2118604 ] {} BaseDaemon: 2024.04.08 13:15:29.534510 [ 2118604 ] {} BaseDaemon: Stack trace: 0x00007f0fe474f00b 0x00007f0fe472e859 0x000000000d24f72e 0x0000000017d15be3 0x0000000017d15818 0x0000000012fd350d 0x0000000012fdcc6e 0x000000000d25c65b 0x000000000d25e775 0x00007f0fe4906609 0x00007f0fe482b353 2024.04.08 13:15:29.534531 [ 2118604 ] {} BaseDaemon: ######################################## 2024.04.08 13:15:29.534609 [ 2118604 ] {} BaseDaemon: (version 24.2.2.71 (official build), build id: 57F857DCFE8BA6838F6463E4665CD700852BFF0E, git hash: 9293d361e72be9f6ccfd444d504e2137b2e837cf) (from thread 2118603) (no query) Received signal Aborted (6) 2024.04.08 13:15:29.534638 [ 2118604 ] {} BaseDaemon: 2024.04.08 13:15:29.534663 [ 2118604 ] {} BaseDaemon: Stack trace: 0x00007f0fe474f00b 0x00007f0fe472e859 0x000000000d24f72e 0x0000000017d15be3 0x0000000017d15818 0x0000000012fd350d 0x0000000012fdcc6e 0x000000000d25c65b 0x000000000d25e775 0x00007f0fe4906609 0x00007f0fe482b353 2024.04.08 13:15:29.534711 [ 2118604 ] {} BaseDaemon: 2. ? @ 0x00007f0fe474f00b 2024.04.08 13:15:29.534728 [ 2118604 ] {} BaseDaemon: 3. ? @ 0x00007f0fe472e859 2024.04.08 13:15:29.613230 [ 2118604 ] {} BaseDaemon: 4.0. inlined from ./contrib/llvm-project/libcxxabi/src/cxa_exception.cpp:670: __cxa_decrement_exception_refcount 2024.04.08 13:15:29.613283 [ 2118604 ] {} BaseDaemon: 4.1. inlined from ./contrib/llvm-project/libcxx/src/support/runtime/exception_pointer_cxxabi.ipp:17: ~exception_ptr 2024.04.08 13:15:29.613311 [ 2118604 ] {} BaseDaemon: 4. ./build_docker/./src/Daemon/BaseDaemon.cpp:591: terminate_handler() @ 0x000000000d24f72e 2024.04.08 13:15:29.617590 [ 2118604 ] {} BaseDaemon: 5. ./build_docker/./contrib/llvm-project/libcxxabi/src/cxa_handlers.cpp:61: std::__terminate(void (*)()) @ 0x0000000017d15be3 2024.04.08 13:15:29.619575 [ 2118604 ] {} BaseDaemon: 6. ./build_docker/./contrib/llvm-project/libcxxabi/src/cxa_exception.cpp:0: __cxa_rethrow @ 0x0000000017d15818 2024.04.08 13:15:30.104097 [ 2118604 ] {} BaseDaemon: 7.0. inlined from ./src/IO/WriteBuffer.h:0: DB::WriteBuffer::next() 2024.04.08 13:15:30.104331 [ 2118604 ] {} BaseDaemon: 7.1. inlined from ./build_docker/./src/Server/TCPHandler.cpp:2225: DB::TCPHandler::sendLogData(DB::Block const&) 2024.04.08 13:15:30.104408 [ 2118604 ] {} BaseDaemon: 7. ./build_docker/./src/Server/TCPHandler.cpp:2303: DB::TCPHandler::sendLogs() @ 0x0000000012fd350d 2024.04.08 13:15:30.217481 [ 2118604 ] {} BaseDaemon: 8.0. inlined from ./contrib/llvm-project/libcxx/include/atomic:958: double std::__cxx_atomic_load[abi:v15000](std::__cxx_atomic_base_impl const*, std::memory_order) 2024.04.08 13:15:30.217579 [ 2118604 ] {} BaseDaemon: 8.1. inlined from ./contrib/llvm-project/libcxx/include/atomic:1588: std::__atomic_base::load[abi:v15000](std::memory_order) const 2024.04.08 13:15:30.217617 [ 2118604 ] {} BaseDaemon: 8.2. inlined from ./src/Common/ThreadFuzzer.cpp:407: pthread_mutex_unlock 2024.04.08 13:15:30.217644 [ 2118604 ] {} BaseDaemon: 8.3. inlined from ./contrib/llvm-project/libcxx/include/__threading_support:314: std::__libcpp_mutex_unlock[abi:v15000](pthread_mutex_t*) 2024.04.08 13:15:30.217676 [ 2118604 ] {} BaseDaemon: 8.4. inlined from ./contrib/llvm-project/libcxx/src/mutex.cpp:52: std::mutex::unlock() 2024.04.08 13:15:30.217699 [ 2118604 ] {} BaseDaemon: 8.5. inlined from ./contrib/llvm-project/libcxx/include/__mutex_base:100: ~lock_guard 2024.04.08 13:15:30.217747 [ 2118604 ] {} BaseDaemon: 8.6. inlined from ./build_docker/./src/Server/TCPHandler.cpp:392: operator() 2024.04.08 13:15:30.217776 [ 2118604 ] {} BaseDaemon: 8.7. inlined from ./contrib/llvm-project/libcxx/include/__functional/invoke.h:394: ? 2024.04.08 13:15:30.217796 [ 2118604 ] {} BaseDaemon: 8.8. inlined from ./contrib/llvm-project/libcxx/include/__functional/invoke.h:479: ? 2024.04.08 13:15:30.217859 [ 2118604 ] {} BaseDaemon: 8.9. inlined from ./contrib/llvm-project/libcxx/include/__functional/function.h:235: ? 2024.04.08 13:15:30.217878 [ 2118604 ] {} BaseDaemon: 8. ./contrib/llvm-project/libcxx/include/__functional/function.h:716: ? @ 0x0000000012fdcc6e 2024.04.08 13:15:30.240809 [ 2118604 ] {} BaseDaemon: 9. ./contrib/llvm-project/libcxx/include/__functional/function.h:0: ? @ 0x000000000d25c65b 2024.04.08 13:15:30.283617 [ 2118604 ] {} BaseDaemon: 10.0. inlined from ./build_docker/./src/Daemon/BaseDaemon.cpp:286: operator() 2024.04.08 13:15:30.283686 [ 2118604 ] {} BaseDaemon: 10.1. inlined from ./contrib/llvm-project/libcxx/include/__functional/invoke.h:394: ? 2024.04.08 13:15:30.283725 [ 2118604 ] {} BaseDaemon: 10.2. inlined from ./contrib/llvm-project/libcxx/include/thread:284: void std::__thread_execute[abi:v15000]>, SignalListener::run()::'lambda'()>(std::tuple>, SignalListener::run()::'lambda'()>&, std::__tuple_indices<>) 2024.04.08 13:15:30.283755 [ 2118604 ] {} BaseDaemon: 10. ./contrib/llvm-project/libcxx/include/thread:295: void* std::__thread_proxy[abi:v15000]>, SignalListener::run()::'lambda'()>>(void*) @ 0x000000000d25e775 2024.04.08 13:15:30.283799 [ 2118604 ] {} BaseDaemon: 11. ? @ 0x00007f0fe4906609 2024.04.08 13:15:30.283821 [ 2118604 ] {} BaseDaemon: 12. ? @ 0x00007f0fe482b353 2024.04.08 13:15:30.574588 [ 2118604 ] {} BaseDaemon: Integrity check of the executable successfully passed (checksum: 3485110FABDB0C94202BD684999A9814) 2024.04.08 13:15:30.574704 [ 2118604 ] {} BaseDaemon: Report this error to https://github.com/ClickHouse/ClickHouse/issues
v2: fatal logging Signed-off-by: Azat Khuzhin --- src/Daemon/BaseDaemon.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index cc22db3969c4..019ad8e716bb 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -332,6 +332,7 @@ class SignalListener : public Poco::Runnable const std::vector & thread_frame_pointers, UInt32 thread_num, ThreadStatus * thread_ptr) const + try { ThreadStatus thread_status; @@ -543,6 +544,11 @@ class SignalListener : public Poco::Runnable fatal_error_printed.test_and_set(); } + catch (...) + { + PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); + LOG_FATAL(getLogger(__PRETTY_FUNCTION__), message); + } }; From 5db0df0bc1ff7be9e39dac189c7bfe2dfb6179ae Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 8 Apr 2024 21:48:36 +0300 Subject: [PATCH 370/470] Update BaseDaemon.cpp --- src/Daemon/BaseDaemon.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Daemon/BaseDaemon.cpp b/src/Daemon/BaseDaemon.cpp index 019ad8e716bb..592ca4e55d52 100644 --- a/src/Daemon/BaseDaemon.cpp +++ b/src/Daemon/BaseDaemon.cpp @@ -520,7 +520,7 @@ class SignalListener : public Poco::Runnable } } - /// ClickHouse Keeper does not link to some part of Settings. + /// ClickHouse Keeper does not link to some parts of Settings. #ifndef CLICKHOUSE_KEEPER_STANDALONE_BUILD /// List changed settings. if (!query_id.empty()) @@ -538,7 +538,7 @@ class SignalListener : public Poco::Runnable } #endif - /// When everything is done, we will try to send these error messages to client. + /// When everything is done, we will try to send these error messages to the client. if (thread_ptr) thread_ptr->onFatalError(); @@ -546,6 +546,7 @@ class SignalListener : public Poco::Runnable } catch (...) { + /// onFault is called from the std::thread, and it should catch all exceptions; otherwise, you can get unrelated fatal errors. PreformattedMessage message = getCurrentExceptionMessageAndPattern(true); LOG_FATAL(getLogger(__PRETTY_FUNCTION__), message); } From 33eba42c13c6894b05972df9588b49a78051d5f0 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Apr 2024 20:50:41 +0200 Subject: [PATCH 371/470] Document wyHash64 --- .../sql-reference/functions/hash-functions.md | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 90c7d8c2206b..ab97e43c6243 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1077,9 +1077,7 @@ Result: ## wordShingleSimHashUTF8 -Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. -Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. **Syntax** @@ -1153,6 +1151,42 @@ Result: └────────────┘ ``` +## wyHash64 + +Produces a 64-bit [wyHash64](https://github.com/wangyi-fudan/wyhash) hash value. + +**Syntax** + +```sql +wyHash64(string) +``` + +**Arguments** + +- `string` — String. [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- Hash value. + +Type: [UInt64](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +```sql +SELECT wyHash64('ClickHouse') AS Hash; +``` + +Result: + +```response +┌─────────────────Hash─┐ +│ 12336419557878201794 │ +└──────────────────────┘ +``` + ## ngramMinHash Splits a ASCII string into n-grams of `ngramsize` symbols and calculates hash values for each n-gram. Uses `hashnum` minimum hashes to calculate the minimum hash and `hashnum` maximum hashes to calculate the maximum hash. Returns a tuple with these hashes. Is case sensitive. From 9a08f1ddd6f414097a6bf00f1159924d4a14b4e4 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 8 Apr 2024 20:53:19 +0200 Subject: [PATCH 372/470] Update StorageFileLog.cpp --- src/Storages/FileLog/StorageFileLog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 7b0cfdf6a6cc..ba6528b6e4ca 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -152,7 +152,7 @@ StorageFileLog::StorageFileLog( if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath())) { - if (LoadingStrictnessLevel::ATTACH <= mode) + if (LoadingStrictnessLevel::SECONDARY_CREATE <= mode) { LOG_ERROR(log, "The absolute data path should be inside `user_files_path`({})", getContext()->getUserFilesPath()); return; From 3f91ece54b140906f050c0d6fc11563b81e48364 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Apr 2024 20:54:42 +0200 Subject: [PATCH 373/470] Remove blank space from wordShingleSimHashUTF8 --- docs/en/sql-reference/functions/hash-functions.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index ab97e43c6243..9bfaaae54637 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1077,8 +1077,6 @@ Result: ## wordShingleSimHashUTF8 - - **Syntax** ```sql From 1b9a72e374f0194d3ab856f5cc0cd491a65af9a4 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Apr 2024 20:56:30 +0200 Subject: [PATCH 374/470] Add back accidently removed description --- docs/en/sql-reference/functions/hash-functions.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 9bfaaae54637..902e5ab9baf2 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -1077,6 +1077,10 @@ Result: ## wordShingleSimHashUTF8 +Splits a UTF-8 string into parts (shingles) of `shinglesize` words and returns the word shingle `simhash`. Is case sensitive. + +Can be used for detection of semi-duplicate strings with [bitHammingDistance](/docs/en/sql-reference/functions/bit-functions.md/#bithammingdistance). The smaller is the [Hamming Distance](https://en.wikipedia.org/wiki/Hamming_distance) of the calculated `simhashes` of two strings, the more likely these strings are the same. + **Syntax** ```sql From 2280fdeec1d41fdeb7a09459577312de8dc70bec Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Apr 2024 19:16:47 +0000 Subject: [PATCH 375/470] Empty commit From b138b1e103d6ccba62620b849931a9a607e9a42b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 8 Apr 2024 19:18:44 +0000 Subject: [PATCH 376/470] Empty commit From 208722a5133caac4f8d1e54afd0d301a1ff0d2de Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Apr 2024 21:55:27 +0200 Subject: [PATCH 377/470] Fix spelling mistake --- docs/en/sql-reference/functions/string-search-functions.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 8aff8b7e9303..9b4578809910 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -13,7 +13,7 @@ Case-insensitive search follows the lowercase-uppercase rules of the English lan `I` whereas in the Turkish language it is `İ` - results for languages other than English may be unexpected. ::: -Functions in this section also assume that the searched string (refered to in this section as `haystack`) and the search string (refered to in this section as `needle`) are single-byte encoded text. If this assumption is +Functions in this section also assume that the searched string (referred to in this section as `haystack`) and the search string (referred to in this section as `needle`) are single-byte encoded text. If this assumption is violated, no exception is thrown and results are undefined. Search with UTF-8 encoded strings is usually provided by separate function variants. Likewise, if a UTF-8 function variant is used and the input strings are not UTF-8 encoded text, no exception is thrown and the results are undefined. Note that no automatic Unicode normalization is performed, however you can use the From 2c41bcb25aae59f4f7964ef112e0a9c426c65f27 Mon Sep 17 00:00:00 2001 From: Blargian Date: Mon, 8 Apr 2024 21:57:11 +0200 Subject: [PATCH 378/470] Add multiSearchXYZ functions to aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 57a8e0d58402..ebc538450111 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -550,6 +550,17 @@ Mongodb mortonDecode mortonEncode MsgPack +multiSearchAllPositionsCaseInsensitive +multiSearchAllPositionsCaseInsensitiveUTF +multiSearchAnyCaseInsensitive +multiSearchAnyCaseInsensitiveUTF +multiSearchAnyUTF +multiSearchFirstIndexCaseInsensitive +multiSearchFirstIndexCaseInsensitiveUTF +multiSearchFirstIndexUTF +multiSearchFirstPositionCaseInsensitive +multiSearchFirstPositionCaseInsensitiveUTF +multiSearchFirstPositionUTF MultiPolygon Multiline Multiqueries From f20771542633e85b07f49674e861740f44d1fe3e Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Mon, 8 Apr 2024 22:25:50 +0200 Subject: [PATCH 379/470] Update 03068_analyzer_distributed_join.sql --- tests/queries/0_stateless/03068_analyzer_distributed_join.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql index 61b1199dc448..542380feb7c5 100644 --- a/tests/queries/0_stateless/03068_analyzer_distributed_join.sql +++ b/tests/queries/0_stateless/03068_analyzer_distributed_join.sql @@ -1,5 +1,5 @@ --- https://github.com/ClickHouse/ClickHouse/issues/6571 --- Tag: no-replicated-database +-- Tags: no-replicated-database +-- Closes: https://github.com/ClickHouse/ClickHouse/issues/6571 SET allow_experimental_analyzer=1; CREATE TABLE LINEITEM_shard ON CLUSTER test_shard_localhost From eac8852c75da47991fa85c0611e8ef53c7059474 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Apr 2024 00:14:49 +0200 Subject: [PATCH 380/470] Less flaky tests --- ...8_analyzer_ambiguous_column_multi_call.sql | 12 +++---- ...same_table_name_in_different_databases.sql | 32 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql index 4ca5005fa1dc..e6f1ed81f91b 100644 --- a/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql +++ b/tests/queries/0_stateless/03088_analyzer_ambiguous_column_multi_call.sql @@ -1,13 +1,13 @@ -- https://github.com/ClickHouse/ClickHouse/issues/61014 SET allow_experimental_analyzer=1; -DROP DATABASE IF EXISTS test_03088; -create database test_03088; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +create database {CLICKHOUSE_DATABASE:Identifier}; -create table test_03088.a (i int) engine = Log(); +create table {CLICKHOUSE_DATABASE:Identifier}.a (i int) engine = Log(); select - test_03088.a.i + {CLICKHOUSE_DATABASE:Identifier}.a.i from - test_03088.a, - test_03088.a as x; + {CLICKHOUSE_DATABASE:Identifier}.a, + {CLICKHOUSE_DATABASE:Identifier}.a as x; diff --git a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql index 2185b5f450ae..436f9395fc4d 100644 --- a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql +++ b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql @@ -1,28 +1,28 @@ -- https://github.com/ClickHouse/ClickHouse/issues/61947 SET allow_experimental_analyzer=1; -DROP DATABASE IF EXISTS d1; -DROP DATABASE IF EXISTS d2; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE:Identifier}; +DROP DATABASE IF EXISTS {CLICKHOUSE_DATABASE_1:Identifier}; -CREATE DATABASE d1; -CREATE DATABASE d2; -CREATE TABLE d1.`1-1` (field Int8) ENGINE = Memory; -CREATE TABLE d2.`1-1` (field Int8) ENGINE = Memory; -CREATE TABLE d2.`2-1` (field Int8) ENGINE = Memory; +CREATE DATABASE {CLICKHOUSE_DATABASE:Identifier}; +CREATE DATABASE {CLICKHOUSE_DATABASE_1:Identifier}; +CREATE TABLE {CLICKHOUSE_DATABASE:Identifier}.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` (field Int8) ENGINE = Memory; +CREATE TABLE {CLICKHOUSE_DATABASE_1:Identifier}.`2-1` (field Int8) ENGINE = Memory; -INSERT INTO d1.`1-1` VALUES (1); +INSERT INTO {CLICKHOUSE_DATABASE:Identifier}.`1-1` VALUES (1); SELECT * -FROM d1.`1-1` -LEFT JOIN d2.`1-1` ON d1.`1-1`.field = d2.`1-1`.field; +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` ON {CLICKHOUSE_DATABASE:Identifier}.`1-1`.field = {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.field; SELECT ''; -SELECT 'using asterisk', d1.`1-1`.*, d2.`1-1`.* -FROM d1.`1-1` -LEFT JOIN d2.`1-1` USING field +SELECT 'using asterisk', {CLICKHOUSE_DATABASE:Identifier}.`1-1`.*, {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.* +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` USING field UNION ALL -SELECT 'using field name', d1.`1-1`.field, d2.`1-1`.field -FROM d1.`1-1` -LEFT JOIN d2.`1-1` USING field +SELECT 'using field name', {CLICKHOUSE_DATABASE:Identifier}.`1-1`.field, {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.field +FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` +LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` USING field ORDER BY *; From f45acef1316dd1f15becd386e880471642765f44 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Tue, 9 Apr 2024 00:33:18 +0000 Subject: [PATCH 381/470] Support for a tiny feature in stateless tests image --- docker/test/stateless/run.sh | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index bac9d8df7a9d..24b821f1d7da 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -41,6 +41,8 @@ source /utils.lib if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then echo "Azure is disabled" +elif [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + echo "Azure is disabled" else azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & fi @@ -137,6 +139,32 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi +if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ + > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml + + sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ + > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml + + mkdir -p /var/run/clickhouse-server1 + sudo chown clickhouse:clickhouse /var/run/clickhouse-server1 + sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ + --pid-file /var/run/clickhouse-server1/clickhouse-server.pid \ + -- --path /var/lib/clickhouse1/ --logger.stderr /var/log/clickhouse-server/stderr1.log \ + --logger.log /var/log/clickhouse-server/clickhouse-server1.log --logger.errorlog /var/log/clickhouse-server/clickhouse-server1.err.log \ + --tcp_port 19000 --tcp_port_secure 19440 --http_port 18123 --https_port 18443 --interserver_http_port 19009 --tcp_with_proxy_port 19010 \ + --mysql_port 19004 --postgresql_port 19005 \ + --keeper_server.tcp_port 19181 --keeper_server.server_id 2 \ + --prometheus.port 19988 \ + --macros.replica r2 # It doesn't work :( + + MAX_RUN_TIME=$((MAX_RUN_TIME < 9000 ? MAX_RUN_TIME : 9000)) # min(MAX_RUN_TIME, 2.5 hours) + MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) +fi # Wait for the server to start, but not for too long. for _ in {1..100} @@ -183,6 +211,10 @@ function run_tests() ADDITIONAL_OPTIONS+=('--s3-storage') fi + if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + ADDITIONAL_OPTIONS+=('--shared-catalog') + fi + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then ADDITIONAL_OPTIONS+=('--replicated-database') # Too many tests fail for DatabaseReplicated in parallel. @@ -264,6 +296,12 @@ do echo "$err" [[ "0" != "${#err}" ]] && failed_to_save_logs=1 fi + + if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + err=$( { clickhouse-client --port 19000 -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst; } 2>&1 ) + echo "$err" + [[ "0" != "${#err}" ]] && failed_to_save_logs=1 + fi done # Stop server so we can safely read data with clickhouse-local. @@ -275,6 +313,10 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] sudo clickhouse stop --pid-path /var/run/clickhouse-server2 ||: fi +if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + sudo clickhouse stop --pid-path /var/run/clickhouse-server1 ||: +fi + rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & @@ -302,6 +344,10 @@ if [ $failed_to_save_logs -ne 0 ]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: fi + + if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables --stacktrace -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: + fi done fi @@ -341,3 +387,10 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||: fi + +if [[ -n "$USE_SHARED_CATALOG" ]] && [[ "$USE_SHARED_CATALOG" -eq 1 ]]; then + rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: + zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.zst ||: + mv /var/log/clickhouse-server/stderr1.log /test_output/ ||: + tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||: +fi From 0265ba502716d8536c85ea8914ba791f1d278c66 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 10:55:08 +0200 Subject: [PATCH 382/470] Add wyHash to aspell-ignore --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8aa2a463c477..84c70d918797 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -2760,6 +2760,7 @@ wordShingleSimHashUTF wordshingleMinHash writability wrt +wyHash xcode xeus xkcd From 6e90a197f8e88a2d0a74490e96455bc203365295 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Apr 2024 12:02:11 +0200 Subject: [PATCH 383/470] Update 03091_analyzer_same_table_name_in_different_databases.sql --- ...03091_analyzer_same_table_name_in_different_databases.sql | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql index 436f9395fc4d..599275c66e86 100644 --- a/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql +++ b/tests/queries/0_stateless/03091_analyzer_same_table_name_in_different_databases.sql @@ -18,6 +18,8 @@ LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` ON {CLICKHOUSE_DATABASE:Ident SELECT ''; +SELECT * FROM +( SELECT 'using asterisk', {CLICKHOUSE_DATABASE:Identifier}.`1-1`.*, {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.* FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` USING field @@ -25,4 +27,5 @@ UNION ALL SELECT 'using field name', {CLICKHOUSE_DATABASE:Identifier}.`1-1`.field, {CLICKHOUSE_DATABASE_1:Identifier}.`1-1`.field FROM {CLICKHOUSE_DATABASE:Identifier}.`1-1` LEFT JOIN {CLICKHOUSE_DATABASE_1:Identifier}.`1-1` USING field -ORDER BY *; +) +ORDER BY ALL; From 0543fc3263dba3cf56e1445725f3589e43d91b21 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 9 Apr 2024 13:17:42 +0300 Subject: [PATCH 384/470] OptimizeGroupByInjectiveFunctionsPass remove unused constant --- .../OptimizeGroupByInjectiveFunctionsPass.cpp | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp index 618932025253..a30ad2a1590f 100644 --- a/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp +++ b/src/Analyzer/Passes/OptimizeGroupByInjectiveFunctionsPass.cpp @@ -12,24 +12,6 @@ namespace DB namespace { -const std::unordered_set possibly_injective_function_names -{ - "dictGet", - "dictGetString", - "dictGetUInt8", - "dictGetUInt16", - "dictGetUInt32", - "dictGetUInt64", - "dictGetInt8", - "dictGetInt16", - "dictGetInt32", - "dictGetInt64", - "dictGetFloat32", - "dictGetFloat64", - "dictGetDate", - "dictGetDateTime" -}; - class OptimizeGroupByInjectiveFunctionsVisitor : public InDepthQueryTreeVisitorWithContext { using Base = InDepthQueryTreeVisitorWithContext; From 4e344f6a9397d27a15af82023b182469eaeebd35 Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 9 Apr 2024 10:25:41 +0000 Subject: [PATCH 385/470] remove ci status and reports for MQ case --- tests/ci/commit_status_helper.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/ci/commit_status_helper.py b/tests/ci/commit_status_helper.py index bda2db139919..56728c3d3ba7 100644 --- a/tests/ci/commit_status_helper.py +++ b/tests/ci/commit_status_helper.py @@ -148,6 +148,11 @@ def set_status_comment(commit: Commit, pr_info: PRInfo) -> None: """It adds or updates the comment status to all Pull Requests but for release one, so the method does nothing for simple pushes and pull requests with `release`/`release-lts` labels""" + + if pr_info.is_merge_queue(): + # skip report creation for the MQ + return + # to reduce number of parameters, the Github is constructed on the fly gh = Github() gh.__requester = commit._requester # type:ignore #pylint:disable=protected-access @@ -441,7 +446,9 @@ def update_mergeable_check(commit: Commit, pr_info: PRInfo, check_name: str) -> or pr_info.release_pr or pr_info.number == 0 ) - if not_run: + + # FIXME: For now, always set mergeable check in the Merge Queue. It's required to pass MQ + if not_run and not pr_info.is_merge_queue(): # Let's avoid unnecessary work return From fe868ddf86ccaa60bccbe44afbfbe1e24ecf6c5d Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 12:31:35 +0200 Subject: [PATCH 386/470] Document uniqCombined64 and update uniqueCombined --- .../reference/uniqcombined.md | 19 +++-- .../reference/uniqcombined64.md | 76 ++++++++++++++++++- 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 2f3efde859d8..99a46c14a30d 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -15,9 +15,9 @@ The `uniqCombined` function is a good choice for calculating the number of diffe **Arguments** -The function takes a variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. +- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). +- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. -`HLL_precision` is the base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optional, you can use the function as `uniqCombined(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). **Returned value** @@ -25,26 +25,29 @@ The function takes a variable number of parameters. Parameters can be `Tuple`, ` **Implementation details** -Function: +The `uniqCombined` function: - Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. - - Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. - - For a small number of distinct elements, an array is used. When the set size is larger, a hash table is used. For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. - + - For a small number of distinct elements, an array is used. + - When the set size is larger, a hash table is used. + - For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. - Provides the result deterministically (it does not depend on the query processing order). :::note Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) ::: -Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined`: +Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined` function: - Consumes several times less memory. - Calculates with several times higher accuracy. - Usually has slightly lower performance. In some scenarios, `uniqCombined` can perform better than `uniq`, for example, with distributed queries that transmit a large number of aggregation states over the network. +**Example** + + + **See Also** - [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md index 9f010da57f21..b6e09bcaae34 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined64.md @@ -5,4 +5,78 @@ sidebar_position: 193 # uniqCombined64 -Same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses 64-bit hash for all data types. +Calculates the approximate number of different argument values. It is the same as [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined), but uses a 64-bit hash for all data types rather than just for the String data type. + +``` sql +uniqCombined64(HLL_precision)(x[, ...]) +``` + +**Parameters** + +- `HLL_precision`: The base-2 logarithm of the number of cells in [HyperLogLog](https://en.wikipedia.org/wiki/HyperLogLog). Optionally, you can use the function as `uniqCombined64(x[, ...])`. The default value for `HLL_precision` is 17, which is effectively 96 KiB of space (2^17 cells, 6 bits each). +- `X`: A variable number of parameters. Parameters can be `Tuple`, `Array`, `Date`, `DateTime`, `String`, or numeric types. + +**Returned value** + +- A number [UInt64](../../../sql-reference/data-types/int-uint.md)-type number. + +**Implementation details** + +The `uniqCombined64` function: +- Calculates a hash (64-bit hash for all data types) for all parameters in the aggregate, then uses it in calculations. +- Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. + - For a small number of distinct elements, an array is used. + - When the set size is larger, a hash table is used. + - For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. +- Provides the result deterministically (it does not depend on the query processing order). + +:::note +Since it uses 64-bit hash for all types, the result does not suffer from very high error for cardinalities significantly larger than `UINT_MAX` like [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) does, which uses a 32-bit hash for non-`String` types. +::: + +Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined64` function: + +- Consumes several times less memory. +- Calculates with several times higher accuracy. + +**Example** + +In the example below `uniqCombined64` is run on `1e10` different numbers returning a very close approximation of the number of different argument values. + +Query: + +```sql +SELECT uniqCombined64(number) FROM numbers(1e10); +``` + +Result: + +```response +┌─uniqCombined64(number)─┐ +│ 9998568925 │ -- 10.00 billion +└────────────────────────┘ +``` + +By comparison the `uniqCombined` function returns a rather poor approximation for an input this size. + +Query: + +```sql +SELECT uniqCombined(number) FROM numbers(1e10); +``` + +Result: + +```response +┌─uniqCombined(number)─┐ +│ 5545308725 │ -- 5.55 billion +└──────────────────────┘ +``` + +**See Also** + +- [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) +- [uniqCombined](../../../sql-reference/aggregate-functions/reference/uniqcombined.md) +- [uniqHLL12](../../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12) +- [uniqExact](../../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact) +- [uniqTheta](../../../sql-reference/aggregate-functions/reference/uniqthetasketch.md#agg_function-uniqthetasketch) From af505aafbca25335b3f2a5659e681af373189231 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 12:34:53 +0200 Subject: [PATCH 387/470] Small fix --- .../aggregate-functions/reference/uniqcombined.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 99a46c14a30d..4d47aa6621ef 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -29,13 +29,13 @@ The `uniqCombined` function: - Calculates a hash (64-bit hash for `String` and 32-bit otherwise) for all parameters in the aggregate, then uses it in calculations. - Uses a combination of three algorithms: array, hash table, and HyperLogLog with an error correction table. - - For a small number of distinct elements, an array is used. - - When the set size is larger, a hash table is used. - - For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. + - For a small number of distinct elements, an array is used. + - When the set size is larger, a hash table is used. + - For a larger number of elements, HyperLogLog is used, which will occupy a fixed amount of memory. - Provides the result deterministically (it does not depend on the query processing order). :::note -Since it uses 32-bit hash for non-`String` type, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) +Since it uses a 32-bit hash for non-`String` types, the result will have very high error for cardinalities significantly larger than `UINT_MAX` (error will raise quickly after a few tens of billions of distinct values), hence in this case you should use [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64). ::: Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq) function, the `uniqCombined` function: From 7b3a973ee07d55ea6f8fe2cf6dfe2475e8cefc35 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 12:55:01 +0200 Subject: [PATCH 388/470] Add missing example to uniqCombined --- .../aggregate-functions/reference/uniqcombined.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md index 4d47aa6621ef..18f44d2fcc44 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/en/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -46,7 +46,21 @@ Compared to the [uniq](../../../sql-reference/aggregate-functions/reference/uniq **Example** +Query: +```sql +SELECT uniqCombined(number) FROM numbers(1e6); +``` + +Result: + +```response +┌─uniqCombined(number)─┐ +│ 1001148 │ -- 1.00 million +└──────────────────────┘ +``` + +See the example section of [uniqCombined64](../../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64) for an example of the difference between `uniqCombined` and `uniqCombined64` for much larger inputs. **See Also** From 35e1e5aff7df25401ab3db59399f5bd49ba0deea Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 9 Apr 2024 14:42:29 +0300 Subject: [PATCH 389/470] Perf script update path in documentation --- tests/performance/scripts/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/scripts/README.md b/tests/performance/scripts/README.md index 0a0580c62a00..1a15189fe861 100644 --- a/tests/performance/scripts/README.md +++ b/tests/performance/scripts/README.md @@ -130,7 +130,7 @@ More stages are available, e.g. restart servers or run the tests. See the code. #### Run a single test on the already configured servers ``` -docker/test/performance-comparison/perf.py --host=localhost --port=9000 --runs=1 tests/performance/logical_functions_small.xml +tests/performance/scripts/perf.py --host=localhost --port=9000 --runs=1 tests/performance/logical_functions_small.xml ``` #### Run all tests on some custom configuration From 55798dbdcf4222031afbf945f099d5f3bc111659 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 13:53:31 +0200 Subject: [PATCH 390/470] Add tupleIntDiv function --- .../functions/tuple-functions.md | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index b089de67e98f..873065d226bd 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -584,6 +584,59 @@ SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res └──────────────────────┘ ``` +## tupleIntDiv + +Does integer division of two tuples. + +**Syntax** + +```sql +tupleIntDiv(tuple1, tuple2) +``` + +**Parameters** + +- `tuple1`: tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple2`: tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. + +**Returned value** + +- Tuple of the quotients of `tuple1` and `tuple2`. [Tuple](../data-types/tuple) of integer values. + +**Implementation details** + +- If either `tuple1` or `tuple2` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. + +**Examples** + +Query: + +``` sql +SELECT tupleIntDiv((15, 10, 5),(5, 5, 5)); +``` + +Result: + +``` text +┌─tupleIntDiv((15, 10, 5), (5, 5, 5))─┐ +│ (3,2,1) │ +└─────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT tupleIntDiv((15, 10, 5),(5.5, 5.5, 5.5)); +``` + +Result: + +``` text +┌─tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5))─┐ +│ (2,1,0) │ +└───────────────────────────────────────────┘ +``` + ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). From eed1a33e775599f7fa14dfdcdcb49f41939a9c1d Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 13:55:10 +0200 Subject: [PATCH 391/470] update wording on tupleIntDiv --- docs/en/sql-reference/functions/tuple-functions.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 873065d226bd..7b626f8ab626 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -586,7 +586,7 @@ SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res ## tupleIntDiv -Does integer division of two tuples. +Does integer division of two tuples, returns a tuple of the quotients. **Syntax** @@ -637,6 +637,9 @@ Result: └───────────────────────────────────────────┘ ``` +## tupleIntDivByNumber + + ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). From 5857ecf8481b60e101c1550a073a2176c59b8f61 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 14:01:50 +0200 Subject: [PATCH 392/470] Add tupleIntDivByNumber --- .../functions/tuple-functions.md | 62 +++++++++++++++++-- 1 file changed, 56 insertions(+), 6 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 7b626f8ab626..58613c6194ba 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -586,26 +586,26 @@ SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res ## tupleIntDiv -Does integer division of two tuples, returns a tuple of the quotients. +Does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. **Syntax** ```sql -tupleIntDiv(tuple1, tuple2) +tupleIntDiv(tuple_num, tuple_div) ``` **Parameters** -- `tuple1`: tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. -- `tuple2`: tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. **Returned value** -- Tuple of the quotients of `tuple1` and `tuple2`. [Tuple](../data-types/tuple) of integer values. +- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values. **Implementation details** -- If either `tuple1` or `tuple2` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. +- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. **Examples** @@ -639,6 +639,56 @@ Result: ## tupleIntDivByNumber +Does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. + +**Syntax** + +```sql +tupleIntDivByNumber(tuple_num, div) +``` + +**Parameters** + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Tuple](../data-types/tuple) of numeric type. + +**Returned value** + +- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values. + +**Implementation details** + +- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. + +**Examples** + +Query: + +``` sql +SELECT tupleIntDivByNumber((15, 10, 5),5); +``` + +Result: + +``` text +┌─tupleIntDivByNumber((15, 10, 5), 5)─┐ +│ (3,2,1) │ +└─────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT tupleIntDivByNumber((15.2, 10.7, 5.5),5.8); +``` + +Result: + +``` text +┌─tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8)─┐ +│ (2,1,0) │ +└─────────────────────────────────────────────┘ +``` ## Distance functions From c8fb88b261b4bf1bbab57fb82bf895d471308126 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 14:15:32 +0200 Subject: [PATCH 393/470] Add tupleIntDivOrZero --- .../functions/tuple-functions.md | 44 ++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 58613c6194ba..853514c97431 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -606,6 +606,7 @@ tupleIntDiv(tuple_num, tuple_div) **Implementation details** - If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. +- An error will be thrown for division by 0. **Examples** @@ -659,13 +660,14 @@ tupleIntDivByNumber(tuple_num, div) **Implementation details** - If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor. +- An error will be thrown for division by 0. **Examples** Query: ``` sql -SELECT tupleIntDivByNumber((15, 10, 5),5); +SELECT tupleIntDivByNumber((15, 10, 5), 5); ``` Result: @@ -679,7 +681,7 @@ Result: Query: ``` sql -SELECT tupleIntDivByNumber((15.2, 10.7, 5.5),5.8); +SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8); ``` Result: @@ -690,6 +692,44 @@ Result: └─────────────────────────────────────────────┘ ``` +## tupleIntDivOrZero + +Like [tupleIntDiv](#tupleintdiv) it does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. Does not throw an error for 0 divisors, but rather returns the quotient as 0. + +**Syntax** + +```sql +tupleIntDivOrZero(tuple_num, tuple_div) +``` + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. + +**Returned value** + +- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values. +- Returns 0 for quotients where the divisor is 0. + +**Implementation details** + +- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDiv](#tupleintdiv). + +**Examples** + +Query: + +``` sql +SELECT tupleIntDivOrZero((5, 10, 15),(0, 0, 0)); +``` + +Result: + +``` text +┌─tupleIntDivOrZero((5, 10, 15), (0, 0, 0))─┐ +│ (0,0,0) │ +└───────────────────────────────────────────┘ +``` + ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). From bba66eb05a95541915fd6e4eccd0beac26b16a3b Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Apr 2024 12:30:07 +0000 Subject: [PATCH 394/470] Review fixes. --- src/Analyzer/Utils.cpp | 22 ++++++++++++++++ src/Analyzer/Utils.h | 3 +++ src/Planner/CollectSets.cpp | 34 +++---------------------- src/Planner/CollectSets.h | 4 --- src/Storages/buildQueryTreeForShard.cpp | 5 ++-- 5 files changed, 32 insertions(+), 36 deletions(-) diff --git a/src/Analyzer/Utils.cpp b/src/Analyzer/Utils.cpp index 8ccf95deadc7..2882c4e0c020 100644 --- a/src/Analyzer/Utils.cpp +++ b/src/Analyzer/Utils.cpp @@ -760,4 +760,26 @@ QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_ty return function_node; } +QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context) +{ + const auto & storage_snapshot = table_node->as()->getStorageSnapshot(); + auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); + size_t columns_to_select_size = columns_to_select.size(); + auto column_nodes_to_select = std::make_shared(); + column_nodes_to_select->getNodes().reserve(columns_to_select_size); + NamesAndTypes projection_columns; + projection_columns.reserve(columns_to_select_size); + for (auto & column : columns_to_select) + { + column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, table_node)); + projection_columns.emplace_back(column.name, column.type); + } + auto subquery_for_table = std::make_shared(Context::createCopy(context)); + subquery_for_table->setIsSubquery(true); + subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select); + subquery_for_table->getJoinTree() = std::move(table_node); + subquery_for_table->resolveProjectionColumns(std::move(projection_columns)); + return subquery_for_table; +} + } diff --git a/src/Analyzer/Utils.h b/src/Analyzer/Utils.h index 8e32ef0464c1..1b4a7d5ef3c9 100644 --- a/src/Analyzer/Utils.h +++ b/src/Analyzer/Utils.h @@ -105,4 +105,7 @@ NameSet collectIdentifiersFullNames(const QueryTreeNodePtr & node); /// Wrap node into `_CAST` function QueryTreeNodePtr createCastFunction(QueryTreeNodePtr node, DataTypePtr result_type, ContextPtr context); +/// Build subquery which we execute for `IN table` function. +QueryTreeNodePtr buildSubqueryToReadColumnsFromTableExpression(QueryTreeNodePtr table_node, const ContextPtr & context); + } diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index 37502828f638..b1f2875210d0 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -23,35 +23,6 @@ namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; } - -QueryTreeNodePtr makeExecutableSubqueryForIn(const QueryTreeNodePtr & in_second_argument, const ContextPtr & context) -{ - auto subquery_to_execute = in_second_argument; - if (auto * table_node = in_second_argument->as()) - { - auto storage_snapshot = table_node->getStorageSnapshot(); - auto columns_to_select = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::Ordinary)); - size_t columns_to_select_size = columns_to_select.size(); - auto column_nodes_to_select = std::make_shared(); - column_nodes_to_select->getNodes().reserve(columns_to_select_size); - NamesAndTypes projection_columns; - projection_columns.reserve(columns_to_select_size); - for (auto & column : columns_to_select) - { - column_nodes_to_select->getNodes().emplace_back(std::make_shared(column, subquery_to_execute)); - projection_columns.emplace_back(column.name, column.type); - } - auto subquery_for_table = std::make_shared(Context::createCopy(context)); - subquery_for_table->setIsSubquery(true); - subquery_for_table->getProjectionNode() = std::move(column_nodes_to_select); - subquery_for_table->getJoinTree() = std::move(subquery_to_execute); - subquery_for_table->resolveProjectionColumns(std::move(projection_columns)); - subquery_to_execute = std::move(subquery_for_table); - } - - return subquery_to_execute; -} - namespace { @@ -116,7 +87,10 @@ class CollectSetsVisitor : public ConstInDepthQueryTreeVisitoras()) + subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(std::move(subquery_to_execute), planner_context.getQueryContext()); + sets.addFromSubquery(set_key, std::move(subquery_to_execute), settings); } else diff --git a/src/Planner/CollectSets.h b/src/Planner/CollectSets.h index 0ee006f3320d..5f9f7a5a4665 100644 --- a/src/Planner/CollectSets.h +++ b/src/Planner/CollectSets.h @@ -14,8 +14,4 @@ struct SelectQueryOptions; */ void collectSets(const QueryTreeNodePtr & node, PlannerContext & planner_context); -/// Build subquery which we execute for IN function. -/// It is needed to support `IN table` case. -QueryTreeNodePtr makeExecutableSubqueryForIn(const QueryTreeNodePtr & in_second_argument, const ContextPtr & context); - } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 09e48a93df4b..5bbdbe487b00 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -365,7 +364,9 @@ QueryTreeNodePtr buildQueryTreeForShard(const PlannerContextPtr & planner_contex if (in_function_node_type != QueryTreeNodeType::QUERY && in_function_node_type != QueryTreeNodeType::UNION && in_function_node_type != QueryTreeNodeType::TABLE) continue; - auto subquery_to_execute = makeExecutableSubqueryForIn(in_function_subquery_node, planner_context->getQueryContext()); + auto subquery_to_execute = in_function_subquery_node; + if (subquery_to_execute->as()) + subquery_to_execute = buildSubqueryToReadColumnsFromTableExpression(std::move(subquery_to_execute), planner_context->getQueryContext()); auto temporary_table_expression_node = executeSubqueryNode(subquery_to_execute, planner_context->getMutableQueryContext(), From 92a027a2d0b5c92c7308925e314728588d8e52fa Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 15:01:27 +0200 Subject: [PATCH 395/470] Add OrZero variants --- .../functions/tuple-functions.md | 70 ++++++++++++++++--- 1 file changed, 62 insertions(+), 8 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 853514c97431..2351cfd47d42 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -638,6 +638,44 @@ Result: └───────────────────────────────────────────┘ ``` +## tupleIntDivOrZero + +Like [tupleIntDiv](#tupleintdiv) it does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0. + +**Syntax** + +```sql +tupleIntDivOrZero(tuple_num, tuple_div) +``` + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. + +**Returned value** + +- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values. +- Returns 0 for quotients where the divisor is 0. + +**Implementation details** + +- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDiv](#tupleintdiv). + +**Examples** + +Query: + +``` sql +SELECT tupleIntDivOrZero((5, 10, 15),(0, 0, 0)); +``` + +Result: + +``` text +┌─tupleIntDivOrZero((5, 10, 15), (0, 0, 0))─┐ +│ (0,0,0) │ +└───────────────────────────────────────────┘ +``` + ## tupleIntDivByNumber Does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. @@ -692,40 +730,56 @@ Result: └─────────────────────────────────────────────┘ ``` -## tupleIntDivOrZero +## tupleIntDivOrZeroByNumber -Like [tupleIntDiv](#tupleintdiv) it does integer division of a tuple of numerators and a tuple of denominators, and returns a tuple of the quotients. Does not throw an error for 0 divisors, but rather returns the quotient as 0. +Like [tupleIntDivByNumber](#tupleintdivbynumber) it does integer division of a tuple of numerators by a given denominator, and returns a tuple of the quotients. It does not throw an error for 0 divisors, but rather returns the quotient as 0. **Syntax** ```sql -tupleIntDivOrZero(tuple_num, tuple_div) +tupleIntDivOrZeroByNumber(tuple_num, div) ``` +**Parameters** + - `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. -- `tuple_div`: Tuple of divisor values. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Tuple](../data-types/tuple) of numeric type. **Returned value** -- Tuple of the quotients of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of integer values. +- Tuple of the quotients of `tuple_num` and `div`. [Tuple](../data-types/tuple) of integer values. - Returns 0 for quotients where the divisor is 0. **Implementation details** -- If either `tuple_num` or `tuple_div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDiv](#tupleintdiv). +- If either `tuple_num` or `div` contain non-integer values then the result is calculated by rounding to the nearest integer for each non-integer numerator or divisor as in [tupleIntDivByNumber](#tupleintdivbynumber). **Examples** Query: ``` sql -SELECT tupleIntDivOrZero((5, 10, 15),(0, 0, 0)); +SELECT tupleIntDivOrZeroByNumber((15, 10, 5),(5)); ``` Result: ``` text -┌─tupleIntDivOrZero((5, 10, 15), (0, 0, 0))─┐ +┌─tupleIntDivOrZeroByNumber((15, 10, 5), 5)─┐ +│ (3,2,1) │ +└───────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT tupleIntDivOrZeroByNumber((15, 10, 5),(0)) +``` + +Result: + +``` text +┌─tupleIntDivOrZeroByNumber((15, 10, 5), 0)─┐ │ (0,0,0) │ └───────────────────────────────────────────┘ ``` From 3900b95fafee1b812522f45e25d13b259488e14c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Apr 2024 13:01:38 +0000 Subject: [PATCH 396/470] Another one case with aliases. --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 21 ++++++++----------- ...23_group_by_use_nulls_analyzer_crashes.sql | 2 ++ 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 793cec912932..487b40a5ccfd 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -6694,11 +6694,8 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR { for (const auto & grouping_set : query_node_typed.getGroupBy().getNodes()) { - for (auto & group_by_elem : grouping_set->as()->getNodes()) - { - group_by_elem = group_by_elem->clone(); + for (const auto & group_by_elem : grouping_set->as()->getNodes()) scope.nullable_group_by_keys.insert(group_by_elem); - } } } } @@ -6716,15 +6713,8 @@ void QueryAnalyzer::resolveGroupByNode(QueryNode & query_node_typed, IdentifierR if (scope.group_by_use_nulls) { - for (auto & group_by_elem : query_node_typed.getGroupBy().getNodes()) - { - /// Clone is needed cause aliases share subtrees. - /// If not clone, a part of GROUP BY key could be replaced to nullable - /// by replacing a part of alias from another subtree to nullable. - /// See 03023_group_by_use_nulls_analyzer_crashes - group_by_elem = group_by_elem->clone(); + for (const auto & group_by_elem : query_node_typed.getGroupBy().getNodes()) scope.nullable_group_by_keys.insert(group_by_elem); - } } } } @@ -8037,7 +8027,14 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier resolveGroupByNode(query_node_typed, scope); if (scope.group_by_use_nulls) + { resolved_expressions.clear(); + /// Clone is needed cause aliases share subtrees. + /// If not clone, the same (shared) subtree could be resolved again with different (Nullable) type + /// See 03023_group_by_use_nulls_analyzer_crashes + for (auto & [_, node] : scope.alias_name_to_expression_node) + node = node->clone(); + } if (query_node_typed.hasHaving()) resolveExpressionNode(query_node_typed.getHaving(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/); diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index b6ca454c1cfe..53882f115ba9 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -17,3 +17,5 @@ GROUP BY (number, (toString(x), number)) SETTINGS group_by_use_nulls = 1 FORMAT Null; SELECT tuple(number + 1) AS x FROM numbers(10) GROUP BY number + 1, toString(x) WITH CUBE settings group_by_use_nulls=1 FORMAT Null; + +SELECT tuple(tuple(number)) AS x FROM numbers(10) WHERE toString(toUUID(tuple(number), NULL), x) GROUP BY number, (toString(x), number) WITH CUBE SETTINGS group_by_use_nulls = 1 FORMAT Null; From 7d969dd9faad1226eb60876bff5aa0dcfad5bc93 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 15:19:31 +0200 Subject: [PATCH 397/470] Add tupleModulo tupleModuloByNumber --- .../functions/tuple-functions.md | 86 +++++++++++++++++-- 1 file changed, 79 insertions(+), 7 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 2351cfd47d42..ba177ca3349a 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -613,7 +613,7 @@ tupleIntDiv(tuple_num, tuple_div) Query: ``` sql -SELECT tupleIntDiv((15, 10, 5),(5, 5, 5)); +SELECT tupleIntDiv((15, 10, 5), (5, 5, 5)); ``` Result: @@ -627,7 +627,7 @@ Result: Query: ``` sql -SELECT tupleIntDiv((15, 10, 5),(5.5, 5.5, 5.5)); +SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5)); ``` Result: @@ -665,7 +665,7 @@ tupleIntDivOrZero(tuple_num, tuple_div) Query: ``` sql -SELECT tupleIntDivOrZero((5, 10, 15),(0, 0, 0)); +SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0)); ``` Result: @@ -689,7 +689,7 @@ tupleIntDivByNumber(tuple_num, div) **Parameters** - `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. -- `div`: The divisor value. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type. **Returned value** @@ -743,7 +743,7 @@ tupleIntDivOrZeroByNumber(tuple_num, div) **Parameters** - `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. -- `div`: The divisor value. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type. **Returned value** @@ -759,7 +759,7 @@ tupleIntDivOrZeroByNumber(tuple_num, div) Query: ``` sql -SELECT tupleIntDivOrZeroByNumber((15, 10, 5),(5)); +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), (5)); ``` Result: @@ -773,7 +773,7 @@ Result: Query: ``` sql -SELECT tupleIntDivOrZeroByNumber((15, 10, 5),(0)) +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), (0)) ``` Result: @@ -784,6 +784,78 @@ Result: └───────────────────────────────────────────┘ ``` +## tupleModulo + +Returns a tuple of the moduli (remainders) of division operations of two tuples. + +**Syntax** + +```sql +tupleModulo(tuple_num, tuple_mod) +``` + +**Parameters** + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `tuple_div`: Tuple of modulus values. [Tuple](../data-types/tuple) of numeric type. + +**Returned value** + +- Tuple of the remainders of division of `tuple_num` and `tuple_div`. [Tuple](../data-types/tuple) of non-zero integer values. +- An error is thrown for division by zero. + +**Examples** + +Query: + +``` sql +SELECT tupleModulo((15, 10, 5), (5, 3, 2)); +``` + +Result: + +``` text +┌─tupleModulo((15, 10, 5), (5, 3, 2))─┐ +│ (0,1,1) │ +└─────────────────────────────────────┘ +``` + +## tupleModuloByNumber + +Returns a tuple of the moduli (remainders) of division operations of a tuple and a given divisor. + +**Syntax** + +```sql +tupleModuloByNumber(tuple_num, div) +``` + +**Parameters** + +- `tuple_num`: Tuple of numerator values. [Tuple](../data-types/tuple) of numeric type. +- `div`: The divisor value. [Numeric](../data-types/int-uint.md) type. + +**Returned value** + +- Tuple of the remainders of division of `tuple_num` and `div`. [Tuple](../data-types/tuple) of non-zero integer values. +- An error is thrown for division by zero. + +**Examples** + +Query: + +``` sql +SELECT tupleModuloByNumber((15, 10, 5), 2); +``` + +Result: + +``` text +┌─tupleModuloByNumber((15, 10, 5), 2)─┐ +│ (1,0,1) │ +└─────────────────────────────────────┘ +``` + ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). From 5caa89a2b33efa86fab1bc6a6813e143c8f37f67 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 9 Apr 2024 15:36:34 +0200 Subject: [PATCH 398/470] Fix completion of available ClickHouse tools Now clickhouse --help/ch --help will print --help for clickhouse-local, let's use just "clickhouse help" to get help with list of available tools in clickhouse binary itself. Signed-off-by: Azat Khuzhin --- programs/bash-completion/completions/clickhouse | 2 +- programs/main.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/bash-completion/completions/clickhouse b/programs/bash-completion/completions/clickhouse index ff0a60c60be8..3c895a660754 100644 --- a/programs/bash-completion/completions/clickhouse +++ b/programs/bash-completion/completions/clickhouse @@ -3,7 +3,7 @@ function _clickhouse_get_utils() { local cmd=$1 && shift - "$cmd" --help |& awk '/^clickhouse.*args/ { print $2 }' + "$cmd" help |& awk '/^clickhouse.*args/ { print $2 }' } function _complete_for_clickhouse_entrypoint_bin() diff --git a/programs/main.cpp b/programs/main.cpp index 7162a18d7641..9ad8b016c824 100644 --- a/programs/main.cpp +++ b/programs/main.cpp @@ -487,7 +487,7 @@ int main(int argc_, char ** argv_) /// Interpret binary without argument or with arguments starts with dash /// ('-') as clickhouse-local for better usability: /// - /// clickhouse # dumps help + /// clickhouse help # dumps help /// clickhouse -q 'select 1' # use local /// clickhouse # spawn local /// clickhouse local # spawn local From e07a614006cd756e3e86b01d22e954ca83ef4143 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 15:52:59 +0200 Subject: [PATCH 399/470] Add missing tests for tupleIntXYZ and tupleModulo, tupleModuloByNumber --- .../03033_tupleIntXYZ_and_tupleModulo.reference | 9 +++++++++ .../03033_tupleIntXYZ_and_tupleModulo.sql | 13 +++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.reference create mode 100644 tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.sql diff --git a/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.reference b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.reference new file mode 100644 index 000000000000..f0ce16499393 --- /dev/null +++ b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.reference @@ -0,0 +1,9 @@ +(3,2,1) +(2,1,0) +(0,0,0) +(3,2,1) +(2,1,0) +(3,2,1) +(0,0,0) +(0,1,1) +(1,0,1) diff --git a/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.sql b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.sql new file mode 100644 index 000000000000..2cb7e726a4bf --- /dev/null +++ b/tests/queries/0_stateless/03033_tupleIntXYZ_and_tupleModulo.sql @@ -0,0 +1,13 @@ +SELECT tupleIntDiv((15, 10, 5), (0, 0, 0)); -- { serverError ILLEGAL_DIVISION } +SELECT tupleIntDiv((15, 10, 5), (5, 5, 5)); +SELECT tupleIntDiv((15, 10, 5), (5.5, 5.5, 5.5)); +SELECT tupleIntDivOrZero((5, 10, 15), (0, 0, 0)); -- no error thrown for zero divisors +SELECT tupleIntDivByNumber((15, 10, 5), 0); -- { serverError ILLEGAL_DIVISION } +SELECT tupleIntDivByNumber((15, 10, 5), 5); +SELECT tupleIntDivByNumber((15.2, 10.7, 5.5), 5.8); +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5); +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0); -- no error thrown for zero divisors +SELECT tupleModulo((15, 10, 5), (0, 3, 2)); -- { serverError ILLEGAL_DIVISION } +SELECT tupleModulo((15, 10, 5), (5, 3, 2)); +SELECT tupleModuloByNumber((15, 10, 5), 0); -- { serverError ILLEGAL_DIVISION } +SELECT tupleModuloByNumber((15, 10, 5), 2); \ No newline at end of file From 42a906dca9be03d3380fe165bc3e0fddad90c7e0 Mon Sep 17 00:00:00 2001 From: Peter Date: Mon, 8 Apr 2024 21:41:06 +0800 Subject: [PATCH 400/470] Remove useless param, fix typo and query result --- docs/en/getting-started/example-datasets/opensky.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index df28809495cf..9f8ad1348992 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -7,7 +7,7 @@ title: "Crowdsourced air traffic data from The OpenSky Network 2020" The data in this dataset is derived and cleaned from the full OpenSky dataset to illustrate the development of air traffic during the COVID-19 pandemic. It spans all flights seen by the network's more than 2500 members since 1 January 2019. More data will be periodically included in the dataset until the end of the COVID-19 pandemic. -Source: https://zenodo.org/record/5092942#.YRBCyTpRXYd +Source: https://zenodo.org/records/5092942 Martin Strohmeier, Xavier Olive, Jannis Luebbe, Matthias Schaefer, and Vincent Lenders "Crowdsourced air traffic data from the OpenSky Network 2019–2020" @@ -19,7 +19,7 @@ https://doi.org/10.5194/essd-13-357-2021 Run the command: ```bash -wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget +wget -O- https://zenodo.org/records/5092942 | grep -oE 'https://zenodo.org/records/5092942/files/flightlist_[0-9]+_[0-9]+\.csv\.gz' | xargs wget ``` Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB. @@ -134,7 +134,7 @@ Result: ```text ┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐ -│ 1041090.6465708319 │ +│ 1041090.6360469435 │ └────────────────────────────────────────────────────────────────────┘ ``` From de8d31685db079da0c4e734330a3d75a67a30c5a Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 16:26:35 +0200 Subject: [PATCH 401/470] Minor edit --- docs/en/sql-reference/functions/tuple-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index ba177ca3349a..b3cec1206b8e 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -759,7 +759,7 @@ tupleIntDivOrZeroByNumber(tuple_num, div) Query: ``` sql -SELECT tupleIntDivOrZeroByNumber((15, 10, 5), (5)); +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 5); ``` Result: @@ -773,7 +773,7 @@ Result: Query: ``` sql -SELECT tupleIntDivOrZeroByNumber((15, 10, 5), (0)) +SELECT tupleIntDivOrZeroByNumber((15, 10, 5), 0) ``` Result: From d5014b2d0e3f19aba82cca1480146cec1772e7a0 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 16:44:07 +0200 Subject: [PATCH 402/470] Add missing L2SquaredNorm function --- .../functions/distance-functions.md | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index e20c35c6b6f1..eb991acc94b4 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -82,6 +82,44 @@ Result: └──────────────────┘ ``` +## L2SquaredNorm + +Calculates the square root of the sum of the squares of the vector values (the [L2Norm](#l2norm)) squared. + +**Syntax** + +```sql +L2SquaredNorm(vector) +``` + +Alias: `normL2Squared`. + +***Arguments** + +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- L2-norm squared. + +Type: [Float](../../sql-reference/data-types/float.md). + +**Example** + +Query: + +```sql +SELECT L2SquaredNorm((1, 2)); +``` + +Result: + +```text +┌─L2SquaredNorm((1, 2))─┐ +│ 5 │ +└───────────────────────┘ +``` + ## LinfNorm Calculates the maximum of absolute values of a vector. From 728ed2eee3ee08520fa0e36c81ad5e8c0c5472fc Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 9 Apr 2024 16:44:38 +0200 Subject: [PATCH 403/470] Simplify the change --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 92 +++++++++++------------ 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 48f326521989..5aaf5bc52b68 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2276,61 +2276,61 @@ void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope) { const auto & settings = scope.context->getSettingsRef(); - if (settings.enable_positional_arguments && scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) - { - auto & node_list_typed = node_list->as(); + if (!settings.enable_positional_arguments || !scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + return; - for (auto & node : node_list_typed.getNodes()) - { - auto * node_to_replace = &node; + auto & node_list_typed = node_list->as(); - if (auto * sort_node = node->as()) - node_to_replace = &sort_node->getExpression(); + for (auto & node : node_list_typed.getNodes()) + { + auto * node_to_replace = &node; - auto * constant_node = (*node_to_replace)->as(); + if (auto * sort_node = node->as()) + node_to_replace = &sort_node->getExpression(); - if (!constant_node - || (constant_node->getValue().getType() != Field::Types::UInt64 - && constant_node->getValue().getType() != Field::Types::Int64)) - continue; + auto * constant_node = (*node_to_replace)->as(); - UInt64 pos; - if (constant_node->getValue().getType() == Field::Types::UInt64) - { - pos = constant_node->getValue().get(); - } - else // Int64 + if (!constant_node + || (constant_node->getValue().getType() != Field::Types::UInt64 + && constant_node->getValue().getType() != Field::Types::Int64)) + continue; + + UInt64 pos; + if (constant_node->getValue().getType() == Field::Types::UInt64) + { + pos = constant_node->getValue().get(); + } + else // Int64 + { + auto value = constant_node->getValue().get(); + if (value > 0) + pos = value; + else { - auto value = constant_node->getValue().get(); - if (value > 0) - pos = value; - else - { - if (static_cast(std::abs(value)) > projection_nodes.size()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]. In scope {}", - value, - projection_nodes.size(), - scope.scope_node->formatASTForErrorMessage()); - pos = projection_nodes.size() + value + 1; - } + if (static_cast(std::abs(value)) > projection_nodes.size()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Negative positional argument number {} is out of bounds. Expected in range [-{}, -1]. In scope {}", + value, + projection_nodes.size(), + scope.scope_node->formatASTForErrorMessage()); + pos = projection_nodes.size() + value + 1; } + } - if (!pos || pos > projection_nodes.size()) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}", - pos, - projection_nodes.size(), - scope.scope_node->formatASTForErrorMessage()); + if (!pos || pos > projection_nodes.size()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Positional argument number {} is out of bounds. Expected in range [1, {}]. In scope {}", + pos, + projection_nodes.size(), + scope.scope_node->formatASTForErrorMessage()); - --pos; - *node_to_replace = projection_nodes[pos]->clone(); - if (auto it = resolved_expressions.find(projection_nodes[pos]); it != resolved_expressions.end()) - { - resolved_expressions[*node_to_replace] = it->second; - } + --pos; + *node_to_replace = projection_nodes[pos]->clone(); + if (auto it = resolved_expressions.find(projection_nodes[pos]); it != resolved_expressions.end()) + { + resolved_expressions[*node_to_replace] = it->second; } } } From 8369f8d8c18aea16a815a229f1bfd4af27a9f102 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 16:54:09 +0200 Subject: [PATCH 404/470] Add missing l2SquaredNorm function --- .../functions/distance-functions.md | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index e20c35c6b6f1..5f3514049c77 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -81,6 +81,43 @@ Result: │ 2.23606797749979 │ └──────────────────┘ ``` +## L2SquaredNorm + +Calculates the square root of the sum of the squares of the vector values (the [L2Norm](#l2norm)) squared. + +**Syntax** + +```sql +L2SquaredNorm(vector) +``` + +Alias: `normL2Squared`. + +***Arguments** + +- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). + +**Returned value** + +- L2-norm squared. + +Type: [Float](../../sql-reference/data-types/float.md). + +**Example** + +Query: + +```sql +SELECT L2SquaredNorm((1, 2)); +``` + +Result: + +```text +┌─L2SquaredNorm((1, 2))─┐ +│ 5 │ +└───────────────────────┘ +``` ## LinfNorm From 814de46e136f14a1760f1045f4d08a44e082a42c Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Apr 2024 15:08:03 +0000 Subject: [PATCH 405/470] Another one case. --- src/Analyzer/HashUtils.h | 2 +- tests/queries/0_stateless/02203_shebang.bak | 3 + ...up_by_use_nulls_analyzer_crashes.reference | 4 + ...23_group_by_use_nulls_analyzer_crashes.sql | 2 + tests/queries/0_stateless/users.xml | 110 ++++++++++++++++++ 5 files changed, 120 insertions(+), 1 deletion(-) create mode 100755 tests/queries/0_stateless/02203_shebang.bak create mode 100644 tests/queries/0_stateless/users.xml diff --git a/src/Analyzer/HashUtils.h b/src/Analyzer/HashUtils.h index 80f59c1eaaab..77ade7a4705b 100644 --- a/src/Analyzer/HashUtils.h +++ b/src/Analyzer/HashUtils.h @@ -36,7 +36,7 @@ inline bool operator!=(const QueryTreeNodeWithHash; -using QueryTreeNodePtrWithHashIgnoreTypes = QueryTreeNodeWithHash; +using QueryTreeNodePtrWithHashIgnoreTypes = QueryTreeNodeWithHash; using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash; using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash; diff --git a/tests/queries/0_stateless/02203_shebang.bak b/tests/queries/0_stateless/02203_shebang.bak new file mode 100755 index 000000000000..07686d1aab48 --- /dev/null +++ b/tests/queries/0_stateless/02203_shebang.bak @@ -0,0 +1,3 @@ +#!/usr/bin/clickhouse-local --queries-file + +SELECT 1; diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference index 4243abb1a1e9..17a17484a0cb 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.reference @@ -62,3 +62,7 @@ (9) a b a b +a a +a a + +a a diff --git a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql index 53882f115ba9..687101375429 100644 --- a/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql +++ b/tests/queries/0_stateless/03023_group_by_use_nulls_analyzer_crashes.sql @@ -19,3 +19,5 @@ SETTINGS group_by_use_nulls = 1 FORMAT Null; SELECT tuple(number + 1) AS x FROM numbers(10) GROUP BY number + 1, toString(x) WITH CUBE settings group_by_use_nulls=1 FORMAT Null; SELECT tuple(tuple(number)) AS x FROM numbers(10) WHERE toString(toUUID(tuple(number), NULL), x) GROUP BY number, (toString(x), number) WITH CUBE SETTINGS group_by_use_nulls = 1 FORMAT Null; + +SELECT materialize('a'), 'a' AS key GROUP BY key WITH CUBE WITH TOTALS SETTINGS group_by_use_nulls = 1; diff --git a/tests/queries/0_stateless/users.xml b/tests/queries/0_stateless/users.xml new file mode 100644 index 000000000000..a199435b42f7 --- /dev/null +++ b/tests/queries/0_stateless/users.xml @@ -0,0 +1,110 @@ + + + + + + + + + + + + 1 + + + + + + + + + c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab + + + + ::/0 + + + + default + + + default + + + 1 + + + + + + + + + + + 3600 + + + 0 + 0 + 0 + 0 + 0 + + + + From 283fc115ba3c57a12fbacb8afd0af7cc332722f9 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Apr 2024 15:17:26 +0000 Subject: [PATCH 406/470] Updating the test. --- tests/integration/test_cluster_all_replicas/test.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_cluster_all_replicas/test.py b/tests/integration/test_cluster_all_replicas/test.py index 59b41ca87afb..d8bad180e1b4 100644 --- a/tests/integration/test_cluster_all_replicas/test.py +++ b/tests/integration/test_cluster_all_replicas/test.py @@ -43,7 +43,8 @@ def test_cluster(start_cluster): def test_global_in(start_cluster): - node1.query("CREATE TABLE u(uid Int16) ENGINE=Log as select 0") + node1.query("DROP TABLE IF EXISTS u;") + node1.query("CREATE TABLE u(uid Int16) ENGINE=Memory as select 0") assert set( node1.query( From 6feb2744672f417ffd5d4e0fff394a40af73ff61 Mon Sep 17 00:00:00 2001 From: flynn Date: Tue, 9 Apr 2024 15:18:57 +0000 Subject: [PATCH 407/470] Fix --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index 5aaf5bc52b68..56ccd5c6c22b 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2276,7 +2276,7 @@ void QueryAnalyzer::mergeWindowWithParentWindow(const QueryTreeNodePtr & window_ void QueryAnalyzer::replaceNodesWithPositionalArguments(QueryTreeNodePtr & node_list, const QueryTreeNodes & projection_nodes, IdentifierResolveScope & scope) { const auto & settings = scope.context->getSettingsRef(); - if (!settings.enable_positional_arguments || !scope.context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (!settings.enable_positional_arguments || scope.context->getClientInfo().query_kind != ClientInfo::QueryKind::INITIAL_QUERY) return; auto & node_list_typed = node_list->as(); From 0bb54101b718720b907229f28758355ec52670a3 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 9 Apr 2024 15:35:36 +0000 Subject: [PATCH 408/470] Use shared mutex for stacktrace cache access --- src/Common/StackTrace.cpp | 67 ++++++++++++++------------------------- 1 file changed, 23 insertions(+), 44 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 78ab43e89919..4200161f8e80 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -480,10 +481,8 @@ void StackTrace::toStringEveryLine(void ** frame_pointers_raw, size_t offset, si struct CacheEntry { + std::mutex mutex; std::optional stacktrace_string; - bool to_string_in_progress = false; - - std::condition_variable cv; }; using CacheEntryPtr = std::shared_ptr; @@ -496,67 +495,47 @@ static StackTraceCache & cacheInstance() return cache; } -static std::mutex stacktrace_cache_mutex; +static DB::SharedMutex stacktrace_cache_mutex; String toStringCached(const StackTrace::FramePointers & pointers, size_t offset, size_t size) { const StackTraceRefTriple key{pointers, offset, size}; /// Calculation of stack trace text is extremely slow. - /// We use simple cache because otherwise the server could be overloaded by trash queries. + /// We use cache because otherwise the server could be overloaded by trash queries. /// Note that this cache can grow unconditionally, but practically it should be small. - std::unique_lock lock{stacktrace_cache_mutex}; - CacheEntryPtr cache_entry; StackTraceCache & cache = cacheInstance(); - if (auto it = cache.find(key); it != cache.end()) - { - cache_entry = it->second; - } - else + CacheEntryPtr cache_entry; + + // Optimistic try for cache hit to avoid any contention whatsoever, should be the main hot code route { - auto [new_it, inserted] = cache.emplace(StackTraceTriple{pointers, offset, size}, std::make_shared()); - chassert(inserted); - cache_entry = new_it->second; + std::shared_lock read_lock{stacktrace_cache_mutex}; + if (auto it = cache.find(key); it != cache.end()) + cache_entry = it->second; } - if (!cache_entry->to_string_in_progress && cache_entry->stacktrace_string.has_value()) - return *cache_entry->stacktrace_string; - - if (cache_entry->to_string_in_progress) + // Create a new entry in case of a cache miss + if (!cache_entry) { - cache_entry->cv.wait(lock, [&]{ return !cache_entry->to_string_in_progress; }); + std::unique_lock write_lock{stacktrace_cache_mutex}; - if (cache_entry->stacktrace_string.has_value()) - return *cache_entry->stacktrace_string; + // We should recheck because `shared_lock` was released before we acquired `write_lock` + if (auto it = cache.find(key); it != cache.end()) + cache_entry = it->second; // Another thread managed to created this entry before us + else + cache_entry = cache.emplace(StackTraceTriple{pointers, offset, size}, std::make_shared()).first->second; } - cache_entry->to_string_in_progress = true; - - lock.unlock(); - - String stacktrace_string; - try + // Do not hold `stacktrace_cache_mutex` while running possibly slow calculation of stack trace text + std::scoped_lock lock(cache_entry->mutex); + if (!cache_entry->stacktrace_string.has_value()) { DB::WriteBufferFromOwnString out; toStringEveryLineImpl(false, key, [&](std::string_view str) { out << str << '\n'; }); - stacktrace_string = out.str(); + cache_entry->stacktrace_string = out.str(); } - catch (...) - { - lock.lock(); - cache_entry->to_string_in_progress = false; - lock.unlock(); - cache_entry->cv.notify_one(); - throw; - } - - lock.lock(); - cache_entry->to_string_in_progress = false; - cache_entry->stacktrace_string = stacktrace_string; - lock.unlock(); - cache_entry->cv.notify_all(); - return stacktrace_string; + return *cache_entry->stacktrace_string; } std::string StackTrace::toString() const From c7cb33d035559db910f2a1fa6ea969941b09de7a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 9 Apr 2024 15:43:08 +0000 Subject: [PATCH 409/470] Better parsing --- src/Core/SettingsChangesHistory.h | 2 +- src/Interpreters/Cache/QueryCache.cpp | 23 +++++++++++-------- .../02494_query_cache_system_tables.sql | 13 +++++++++++ 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 7fa12780c8c0..8b5cdf03a33d 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -87,7 +87,7 @@ static std::map sett { {"24.4", {{"input_format_json_throw_on_bad_escape_sequence", true, true, "Allow to save JSON strings with bad escape sequences"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, - {"query_cache_system_table_handling", QueryCacheSystemTableHandling::Save, QueryCacheSystemTableHandling::Throw, "The query cache no longer caches results of queries against system tables"}, + {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp index 67fcdb8159c1..7b1f24e93fca 100644 --- a/src/Interpreters/Cache/QueryCache.cpp +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -9,13 +9,15 @@ #include #include #include +#include +#include #include +#include #include #include #include #include #include -#include #include #include /// chassert @@ -86,16 +88,19 @@ struct HasSystemTablesMatcher /// Handle SELECT [...] FROM clusterAllReplicas(, '
') else if (const auto * literal = node->as()) { - const auto & value = literal->value; /// (*) - database_table = applyVisitor(FieldVisitorDump(), value); + const auto & value = literal->value; + database_table = toString(value); } - /// (*) returns table in quotes, so we can't use .starts_with() for matching - static const re2::RE2 is_system_table(String(DatabaseCatalog::TEMPORARY_DATABASE) - + "|" + DatabaseCatalog::SYSTEM_DATABASE - + "|" + DatabaseCatalog::INFORMATION_SCHEMA - + "|" + DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE); - data.has_system_tables = re2::RE2::PartialMatch(database_table, is_system_table); + Tokens tokens(database_table.c_str(), database_table.c_str() + database_table.size(), /*max_query_size*/ 2048, /*skip_insignificant*/ true); + IParser::Pos pos(tokens, /*max_depth*/ 42, /*max_backtracks*/ 42); + Expected expected; + String database; + String table; + bool successfully_parsed = parseDatabaseAndTableName(pos, expected, database, table); + if (successfully_parsed) + if (DatabaseCatalog::isPredefinedDatabase(database)) + data.has_system_tables = true; } }; diff --git a/tests/queries/0_stateless/02494_query_cache_system_tables.sql b/tests/queries/0_stateless/02494_query_cache_system_tables.sql index 935011a6bb0c..c67a5c49ddab 100644 --- a/tests/queries/0_stateless/02494_query_cache_system_tables.sql +++ b/tests/queries/0_stateless/02494_query_cache_system_tables.sql @@ -44,8 +44,21 @@ SELECT * SETTINGS use_query_cache = 1; SELECT * FROM information_schema.tables SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } SELECT * FROM INFORMATION_SCHEMA.TABLES SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +-- System tables can be "hidden" inside e.g. table functions SELECT * FROM clusterAllReplicas('test_shard_localhost', system.one) SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } SELECT * FROM clusterAllReplicas('test_shard_localhost', 'system.one') SETTINGS use_query_cache = 1; -- {serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +-- Criminal edge case that a user creates a table named "system". The query cache must not reject queries against it. +DROP TABLE IF EXISTS system; +CREATE TABLE system (c UInt64) ENGINE = Memory; +SElECT * FROM system SETTINGS use_query_cache = 1; +DROP TABLE system; + +-- Similar queries against system.system are rejected. +DROP TABLE IF EXISTS system.system; +CREATE TABLE system.system (c UInt64) ENGINE = Memory; +SElECT * FROM system.system SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } +DROP TABLE system; + -- Cleanup SYSTEM DROP QUERY CACHE; From 7aac552fabfa9a2fe19ca6930000eeee395e8752 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 17:44:41 +0200 Subject: [PATCH 410/470] Add missing kostikConsistentHash --- .../sql-reference/functions/hash-functions.md | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 90c7d8c2206b..5bfacd34e197 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -594,6 +594,45 @@ Calculates JumpConsistentHash form a UInt64. Accepts two arguments: a UInt64-type key and the number of buckets. Returns Int32. For more information, see the link: [JumpConsistentHash](https://arxiv.org/pdf/1406.2294.pdf) +## kostikConsistentHash + +An O(1) time and space consistent hash algorithm by Konstantin 'kostik' Oblakov. Previously `yandexConsistentHash`. + +**Syntax** + +```sql +kostikConsistentHash(input, n) +``` + +Alias: `yandexConsistentHash` (left for backwards compatibility sake). + +**Parameters** + +- `input`: A UInt64-type key [UInt64](/docs/en/sql-reference/data-types/int-uint.md). +- `n`: Number of buckets. [UInt16](/docs/en/sql-reference/data-types/int-uint.md). + +**Returned value** + +- A [UInt16](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. + +**Implementation details** + +It is efficient only if n <= 32768. + +**Example** + +Query: + +```sql +SELECT kostikConsistentHash(16045690984833335023, 2); +``` + +```response +┌─kostikConsistentHash(16045690984833335023, 2)─┐ +│ 1 │ +└───────────────────────────────────────────────┘ +``` + ## murmurHash2_32, murmurHash2_64 Produces a [MurmurHash2](https://github.com/aappleby/smhasher) hash value. From 76ebaedfd29311770aef47a59acaf5212fef868a Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 9 Apr 2024 15:45:35 +0000 Subject: [PATCH 411/470] Cosmetics --- tests/queries/0_stateless/02494_query_cache_system_tables.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02494_query_cache_system_tables.sql b/tests/queries/0_stateless/02494_query_cache_system_tables.sql index c67a5c49ddab..7c9f01c4e91f 100644 --- a/tests/queries/0_stateless/02494_query_cache_system_tables.sql +++ b/tests/queries/0_stateless/02494_query_cache_system_tables.sql @@ -54,11 +54,11 @@ CREATE TABLE system (c UInt64) ENGINE = Memory; SElECT * FROM system SETTINGS use_query_cache = 1; DROP TABLE system; --- Similar queries against system.system are rejected. +-- But queries against system.system are rejected. DROP TABLE IF EXISTS system.system; CREATE TABLE system.system (c UInt64) ENGINE = Memory; SElECT * FROM system.system SETTINGS use_query_cache = 1; -- { serverError QUERY_CACHE_USED_WITH_SYSTEM_TABLE } -DROP TABLE system; +DROP TABLE system.system; -- Cleanup SYSTEM DROP QUERY CACHE; From 9419c0f7882f8a99cd15115fdbc4946d9fe0e91c Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 17:53:00 +0200 Subject: [PATCH 412/470] remove l2squared --- .../functions/distance-functions.md | 38 ------------------- 1 file changed, 38 deletions(-) diff --git a/docs/en/sql-reference/functions/distance-functions.md b/docs/en/sql-reference/functions/distance-functions.md index eb991acc94b4..e20c35c6b6f1 100644 --- a/docs/en/sql-reference/functions/distance-functions.md +++ b/docs/en/sql-reference/functions/distance-functions.md @@ -82,44 +82,6 @@ Result: └──────────────────┘ ``` -## L2SquaredNorm - -Calculates the square root of the sum of the squares of the vector values (the [L2Norm](#l2norm)) squared. - -**Syntax** - -```sql -L2SquaredNorm(vector) -``` - -Alias: `normL2Squared`. - -***Arguments** - -- `vector` — [Tuple](../../sql-reference/data-types/tuple.md) or [Array](../../sql-reference/data-types/array.md). - -**Returned value** - -- L2-norm squared. - -Type: [Float](../../sql-reference/data-types/float.md). - -**Example** - -Query: - -```sql -SELECT L2SquaredNorm((1, 2)); -``` - -Result: - -```text -┌─L2SquaredNorm((1, 2))─┐ -│ 5 │ -└───────────────────────┘ -``` - ## LinfNorm Calculates the maximum of absolute values of a vector. From 17d3d57f9f7ab4d915090c98b87c6e161f7ae81d Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Tue, 9 Apr 2024 18:01:12 +0200 Subject: [PATCH 413/470] fix flaky result --- docs/en/getting-started/example-datasets/opensky.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index 9f8ad1348992..c0b4d96725da 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -127,15 +127,15 @@ Average flight distance is around 1000 km. Query: ```sql -SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky; +SELECT round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2) FROM opensky; ``` Result: ```text -┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐ -│ 1041090.6360469435 │ -└────────────────────────────────────────────────────────────────────┘ + ┌─round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 2)─┐ +1. │ 1041090.67 │ -- 1.04 million + └──────────────────────────────────────────────────────────────────────────────┘ ``` ### Most busy origin airports and the average distance seen {#busy-airports-average-distance} From 4895a7de777aa14e12640e293b9a1cd163edea1b Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 9 Apr 2024 18:17:08 +0200 Subject: [PATCH 414/470] Fix format strings --- src/Common/ZooKeeper/ZooKeeperCommon.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperCommon.cpp b/src/Common/ZooKeeper/ZooKeeperCommon.cpp index 4634eae77593..48bb510e5892 100644 --- a/src/Common/ZooKeeper/ZooKeeperCommon.cpp +++ b/src/Common/ZooKeeper/ZooKeeperCommon.cpp @@ -403,7 +403,7 @@ void ZooKeeperSetACLRequest::readImpl(ReadBuffer & in) std::string ZooKeeperSetACLRequest::toStringImpl() const { - return fmt::format("path = {}\n", "version = {}", path, version); + return fmt::format("path = {}\nversion = {}", path, version); } void ZooKeeperSetACLResponse::writeImpl(WriteBuffer & out) const @@ -457,7 +457,7 @@ void ZooKeeperCheckRequest::readImpl(ReadBuffer & in) std::string ZooKeeperCheckRequest::toStringImpl() const { - return fmt::format("path = {}\n", "version = {}", path, version); + return fmt::format("path = {}\nversion = {}", path, version); } void ZooKeeperErrorResponse::readImpl(ReadBuffer & in) From fdfee8e9051a572273e362a370699b20b8731d3b Mon Sep 17 00:00:00 2001 From: Alexander Gololobov Date: Tue, 9 Apr 2024 18:18:27 +0200 Subject: [PATCH 415/470] Fix printing OpNum that are not in default magic_enum range --- src/Common/ZooKeeper/ZooKeeperConstants.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index a5c1d21eda6a..6349cc058d4d 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -2,6 +2,7 @@ #include #include +#include namespace Coordination @@ -64,3 +65,11 @@ static constexpr int32_t DEFAULT_OPERATION_TIMEOUT_MS = 10000; static constexpr int32_t DEFAULT_CONNECTION_TIMEOUT_MS = 1000; } + +/// This is used by fmt::format to print OpNum as strings. +/// All OpNum values shoud be in range [min, max] to be printed. +template <> +struct magic_enum::customize::enum_range { + static constexpr int min = -100; + static constexpr int max = 1000; +}; From fcfaf82181f97d888e4ddccd0754c80e31e7f567 Mon Sep 17 00:00:00 2001 From: Jayme Bird Date: Tue, 9 Apr 2024 17:49:46 +0100 Subject: [PATCH 416/470] fix: add missing hostname column to blob_storage_log system table --- docs/en/operations/system-tables/blob_storage_log.md | 2 ++ src/Interpreters/BlobStorageLog.cpp | 3 +++ 2 files changed, 5 insertions(+) diff --git a/docs/en/operations/system-tables/blob_storage_log.md b/docs/en/operations/system-tables/blob_storage_log.md index 2328f7f0346d..8c0c33a504ad 100644 --- a/docs/en/operations/system-tables/blob_storage_log.md +++ b/docs/en/operations/system-tables/blob_storage_log.md @@ -7,6 +7,7 @@ Contains logging entries with information about various blob storage operations Columns: +- `hostname` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) — Hostname of the server executing the query. - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the event. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event. - `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the event with microseconds precision. @@ -38,6 +39,7 @@ SELECT * FROM system.blob_storage_log WHERE query_id = '7afe0450-504d-4e4b-9a80- ```text Row 1: ────── +hostname: clickhouse.eu-central1.internal event_date: 2023-10-31 event_time: 2023-10-31 16:03:40 event_time_microseconds: 2023-10-31 16:03:40.481437 diff --git a/src/Interpreters/BlobStorageLog.cpp b/src/Interpreters/BlobStorageLog.cpp index f9d5b0d6790b..0324ef8713cf 100644 --- a/src/Interpreters/BlobStorageLog.cpp +++ b/src/Interpreters/BlobStorageLog.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -26,6 +27,7 @@ ColumnsDescription BlobStorageLogElement::getColumnsDescription() return ColumnsDescription { + {"hostname", std::make_shared(std::make_shared()), "Hostname of the server executing the query."}, {"event_date", std::make_shared(), "Date of the event."}, {"event_time", std::make_shared(), "Time of the event."}, {"event_time_microseconds", std::make_shared(6), "Time of the event with microseconds precision."}, @@ -51,6 +53,7 @@ void BlobStorageLogElement::appendToBlock(MutableColumns & columns) const size_t i = 0; auto event_time_seconds = timeInSeconds(event_time); + columns[i++]->insert(getFQDNOrHostName()); columns[i++]->insert(DateLUT::instance().toDayNum(event_time_seconds).toUnderType()); columns[i++]->insert(event_time_seconds); columns[i++]->insert(Decimal64(timeInMicroseconds(event_time))); From 98c1cc7747ba2a8afb18982666f550167a6557ee Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Tue, 9 Apr 2024 17:09:04 +0000 Subject: [PATCH 417/470] new gh runner version 2.315.0 --- tests/ci/worker/prepare-ci-ami.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/worker/prepare-ci-ami.sh b/tests/ci/worker/prepare-ci-ami.sh index 281dff5b1c27..effc224c2d5d 100644 --- a/tests/ci/worker/prepare-ci-ami.sh +++ b/tests/ci/worker/prepare-ci-ami.sh @@ -9,7 +9,7 @@ set -xeuo pipefail echo "Running prepare script" export DEBIAN_FRONTEND=noninteractive -export RUNNER_VERSION=2.313.0 +export RUNNER_VERSION=2.315.0 export RUNNER_HOME=/home/ubuntu/actions-runner deb_arch() { From 077c57a4c9e56c387afffa1da2aaabe970fde305 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 20:14:50 +0200 Subject: [PATCH 418/470] Add functions and word moduli to aspell-dict --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8aa2a463c477..146b9d486074 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1935,6 +1935,7 @@ mmap mmapped modularization moduloOrZero +moduli mongodb monthName moscow @@ -2646,6 +2647,12 @@ tupleMultiplyByNumber tupleNegate tuplePlus tupleToNameValuePairs +tupleIntDiv +tupleIntDivByNumber +tupleIntDivOrZero +tupleIntDivOrZeroByNumber +tupleModulo +tupleModuloByNumber turbostat txt typename From 9a6b987b0088b6173412da57a12ec4f3d86e0234 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 20:16:59 +0200 Subject: [PATCH 419/470] Add kostikConstantHash related words to aspell-dict --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8aa2a463c477..005a5a7c69bf 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -452,6 +452,9 @@ Khanna KittenHouse Klickhouse Kolmogorov +Konstantin +kostik +kostikConsistentHash Korzeniewski Kubernetes LDAP @@ -655,6 +658,7 @@ OTLP OUTFILE ObjectId Observability +Oblakov Octonica Ok OnTime From 9b35c637c173bc64f15ae6978e66966bc2b17ab8 Mon Sep 17 00:00:00 2001 From: Blargian Date: Tue, 9 Apr 2024 20:19:47 +0200 Subject: [PATCH 420/470] Add SquaredNorm to aspell-ignore --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 8aa2a463c477..1da1373070ca 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -860,6 +860,7 @@ Soundex SpanKind Spearman's SquaredDistance +SquaredNorm StartTLS StartTime StartupSystemTables From de2a5f018f4891619fd74eb929b998652b615f83 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Apr 2024 20:40:40 +0200 Subject: [PATCH 421/470] Update CollectSets.cpp --- src/Planner/CollectSets.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Planner/CollectSets.cpp b/src/Planner/CollectSets.cpp index b1f2875210d0..f00b1e6ab164 100644 --- a/src/Planner/CollectSets.cpp +++ b/src/Planner/CollectSets.cpp @@ -23,6 +23,7 @@ namespace ErrorCodes { extern const int UNSUPPORTED_METHOD; } + namespace { From 8a172003ce280c6bd262302408e139325ccdb8bc Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 9 Apr 2024 18:51:49 +0000 Subject: [PATCH 422/470] Fix capture of nested lambda. --- src/Planner/PlannerActionsVisitor.cpp | 74 ++++++++++++++----- .../02389_analyzer_nested_lambda.sql | 6 ++ 2 files changed, 60 insertions(+), 20 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 656b6cdaa6e6..555eb8dbdc3d 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -39,6 +39,7 @@ namespace ErrorCodes extern const int UNSUPPORTED_METHOD; extern const int LOGICAL_ERROR; extern const int BAD_ARGUMENTS; + extern const int INCORRECT_QUERY; } namespace @@ -500,7 +501,39 @@ class PlannerActionsVisitorImpl ActionsDAG::NodeRawConstPtrs visit(QueryTreeNodePtr expression_node); private: - using NodeNameAndNodeMinLevel = std::pair; + + class Levels + { + public: + explicit Levels(size_t level) { set(level); } + + void set(size_t level) + { + check(level); + mask |= uint64_t(1) << level; + } + + void reset(size_t level) + { + check(level); + mask &= ~(uint64_t(1) << level); + } + + void add(Levels levels) { mask |= levels.mask; } + + size_t max() const { return 63 - __builtin_clzll(mask); } + + private: + uint64_t mask = 0; + + void check(size_t level) + { + if (level >= 63) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Maximum lambda depth exceeded. Maximum 63."); + } + }; + + using NodeNameAndNodeMinLevel = std::pair; NodeNameAndNodeMinLevel visitImpl(QueryTreeNodePtr node); @@ -586,11 +619,11 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi column_source->getNodeType() == QueryTreeNodeType::LAMBDA && actions_stack[i].getScopeNode().get() == column_source.get()) { - return {column_node_name, i}; + return {column_node_name, Levels(i)}; } } - return {column_node_name, 0}; + return {column_node_name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitConstant(const QueryTreeNodePtr & node) @@ -660,7 +693,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputConstantColumnIfNecessary(constant_node_name, column); } - return {constant_node_name, 0}; + return {constant_node_name, Levels(0)}; } @@ -688,7 +721,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto lambda_actions_dag = std::make_shared(); actions_stack.emplace_back(lambda_actions_dag, node); - auto [lambda_expression_node_name, level] = visitImpl(lambda_node.getExpression()); + auto [lambda_expression_node_name, levels] = visitImpl(lambda_node.getExpression()); lambda_actions_dag->getOutputs().push_back(actions_stack.back().getNodeOrThrow(lambda_expression_node_name)); lambda_actions_dag->removeUnusedActions(Names(1, lambda_expression_node_name)); @@ -699,8 +732,9 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi ActionsDAG::NodeRawConstPtrs lambda_children; Names required_column_names = lambda_actions->getRequiredColumns(); - if (level == actions_stack.size() - 1) - --level; + actions_stack.pop_back(); + levels.reset(actions_stack.size()); + size_t level = levels.max(); const auto & lambda_argument_names = lambda_node.getArgumentNames(); @@ -718,7 +752,6 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi auto lambda_node_name = calculateActionNodeName(node, *planner_context); auto function_capture = std::make_shared( lambda_actions, captured_column_names, lambda_arguments_names_and_types, lambda_node.getExpression()->getResultType(), lambda_expression_node_name); - actions_stack.pop_back(); // TODO: Pass IFunctionBase here not FunctionCaptureOverloadResolver. const auto * actions_node = actions_stack[level].addFunctionIfNecessary(lambda_node_name, std::move(lambda_children), function_capture); @@ -735,7 +768,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(lambda_node_name, result_type); } - return {lambda_node_name, level}; + return {lambda_node_name, levels}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::makeSetForInFunction(const QueryTreeNodePtr & node) @@ -799,7 +832,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::ma actions_stack_node.addInputConstantColumnIfNecessary(column.name, column); } - return {column.name, 0}; + return {column.name, Levels(0)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitIndexHintFunction(const QueryTreeNodePtr & node) @@ -833,7 +866,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi size_t index_hint_function_level = actions_stack.size() - 1; actions_stack[index_hint_function_level].addFunctionIfNecessary(function_node_name, {}, index_hint_function_overload_resolver); - return {function_node_name, index_hint_function_level}; + return {function_node_name, Levels(index_hint_function_level)}; } PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::visitFunction(const QueryTreeNodePtr & node) @@ -868,7 +901,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); } - return {function_node_name, 0}; + return {function_node_name, Levels(0)}; } const auto & function_arguments = function_node.getArguments().getNodes(); @@ -877,14 +910,14 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi Names function_arguments_node_names; function_arguments_node_names.reserve(function_arguments_size); - size_t level = 0; + Levels levels(0); for (size_t function_argument_index = 0; function_argument_index < function_arguments_size; ++function_argument_index) { if (in_function_second_argument_node_name_with_level && function_argument_index == 1) { - auto & [node_name, node_min_level] = *in_function_second_argument_node_name_with_level; + auto & [node_name, node_levels] = *in_function_second_argument_node_name_with_level; function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); continue; } @@ -892,20 +925,21 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi if (argument->getNodeType() == QueryTreeNodeType::LAMBDA) { - auto [node_name, node_min_level] = visitLambda(argument); + auto [node_name, node_levels] = visitLambda(argument); function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); continue; } - auto [node_name, node_min_level] = visitImpl(argument); + auto [node_name, node_levels] = visitImpl(argument); function_arguments_node_names.push_back(std::move(node_name)); - level = std::max(level, node_min_level); + levels.add(node_levels); } ActionsDAG::NodeRawConstPtrs children; children.reserve(function_arguments_size); + size_t level = levels.max(); for (auto & function_argument_node_name : function_arguments_node_names) children.push_back(actions_stack[level].getNodeOrThrow(function_argument_node_name)); @@ -930,7 +964,7 @@ PlannerActionsVisitorImpl::NodeNameAndNodeMinLevel PlannerActionsVisitorImpl::vi actions_stack_node.addInputColumnIfNecessary(function_node_name, function_node.getResultType()); } - return {function_node_name, level}; + return {function_node_name, levels}; } } diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql index 48e84246d1c9..0ec1b6586948 100644 --- a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql @@ -127,3 +127,9 @@ SELECT arrayMap(x -> splitByChar(toString(id), arrayMap(x -> toString(1), [NULL] DROP TABLE test_table; -- { echoOff } + +SELECT + groupArray(number) AS counts, + arraySum(arrayMap(x -> (x + 1), counts)) as hello, + arrayMap(x -> (x / hello), counts) AS res +FROM numbers(1000000) FORMAT Null; From 0e7d05e0912ddbab866589b65ab5966589860f62 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 10 Apr 2024 01:58:30 +0200 Subject: [PATCH 423/470] Fix UBSan --- src/Client/ClientBaseHelpers.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index 3f3e3b1492fa..b1d29b34ffc6 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -6,6 +6,8 @@ #include #include +#include + namespace DB { @@ -173,7 +175,8 @@ void highlight(const String & query, std::vector & colors /// Highlight the last error in red. If the parser failed or the lexer found an invalid token, /// or if it didn't parse all the data (except, the data for INSERT query, which is legitimately unparsed) if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) - && !(insert_data && expected.max_parsed_pos >= insert_data)) + && !(insert_data && expected.max_parsed_pos >= insert_data) + && expected.max_parsed_pos >= prev) { pos += UTF8::countCodePoints(reinterpret_cast(prev), expected.max_parsed_pos - prev); From 22a3a60c7775ece45e65eefa16f17f7d613413f8 Mon Sep 17 00:00:00 2001 From: flynn Date: Wed, 10 Apr 2024 02:40:14 +0000 Subject: [PATCH 424/470] Fix special build --- src/Common/examples/parallel_aggregation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/examples/parallel_aggregation.cpp b/src/Common/examples/parallel_aggregation.cpp index 20f5f1c5224b..7094690a3a88 100644 --- a/src/Common/examples/parallel_aggregation.cpp +++ b/src/Common/examples/parallel_aggregation.cpp @@ -205,7 +205,7 @@ static void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexe else { size_t hash_value = global_map.hash(*it); - size_t bucket = global_map.getBucketFromHash(hash_value); + size_t bucket = MapTwoLevel::getBucketFromHash(hash_value); if (mutexes[bucket].try_lock()) { From fc9d5ec589b978001fe43b71921fc6940c7951b9 Mon Sep 17 00:00:00 2001 From: peter279k Date: Wed, 10 Apr 2024 10:40:14 +0800 Subject: [PATCH 425/470] Add uptime() function usage --- .../functions/other-functions.md | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index e9f8bc6e547c..187f248e92df 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -916,6 +916,34 @@ Returns the larger value of a and b. Returns the server’s uptime in seconds. If executed in the context of a distributed table, this function generates a normal column with values relevant to each shard. Otherwise it produces a constant value. +**Syntax** + +``` sql +uptime() +``` + +**Returned value** + +- Time value of seconds. + +Type: [UInt32](/docs/en/sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT uptime() as Uptime; +``` + +Result: + +``` response +┌─Uptime─┐ +│ 55867 │ +└────────┘ +``` + ## version() Returns the current version of ClickHouse as a string in the form of: From b96543e57e39f021cd56d3936d93b9382a5c824d Mon Sep 17 00:00:00 2001 From: peter279k Date: Wed, 10 Apr 2024 14:25:49 +0800 Subject: [PATCH 426/470] Add translateUTF8 function usage --- .../functions/string-replace-functions.md | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index c7bd16cad4aa..ab39b064a8bf 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -193,3 +193,33 @@ Result: ## translateUTF8 Like [translate](#translate) but assumes `s`, `from` and `to` are UTF-8 encoded strings. + +**Syntax** + +``` sql +translateUTF8(s, from, to) +``` + +**Parameters** + +- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `from`: A string type [String](/docs/en/sql-reference/data-types/string.md). +- `to`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- `s`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT translateUTF8('Hello, World!', 'delor', 'DELOR') AS res; +``` + +``` response +┌─res───────────┐ +│ HELLO, WORLD! │ +└───────────────┘ +``` From 01f3d57e4be00682ed68557cec2d111ad77ef860 Mon Sep 17 00:00:00 2001 From: peter279k Date: Wed, 10 Apr 2024 12:24:07 +0800 Subject: [PATCH 427/470] Add upper and upperUTF8 function usage --- .../functions/string-functions.md | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 573790f7ff7c..9ca77eff0e0f 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -260,8 +260,36 @@ Alias: `lcase` Converts the ASCII Latin symbols in a string to uppercase. +**Syntax** + +``` sql +upper(input) +``` + Alias: `ucase` +**Parameters** + +- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- A [String](/docs/en/sql-reference/data-types/string.md) data type value. + +**Examples** + +Query: + +``` sql +SELECT upper('value') as Upper; +``` + +``` response +┌─Upper─┐ +│ VALUE │ +└───────┘ +``` + ## lowerUTF8 Converts a string to lowercase, assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. @@ -278,6 +306,34 @@ Does not detect the language, e.g. for Turkish the result might not be exactly c If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. +**Syntax** + +``` sql +upperUTF8(input) +``` + +**Parameters** + +- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- A [String](/docs/en/sql-reference/data-types/string.md) data type value. + +**Example** + +Query: + +``` sql +SELECT upperUTF8('value') as Upperutf8; +``` + +``` response +┌─Upperutf8─┐ +│ VALUE │ +└───────────┘ +``` + ## isValidUTF8 Returns 1, if the set of bytes constitutes valid UTF-8-encoded text, otherwise 0. From 7774a2a313585312527db34e168e56f5405cb157 Mon Sep 17 00:00:00 2001 From: loselarry Date: Wed, 10 Apr 2024 17:21:25 +0800 Subject: [PATCH 428/470] chore: fix some comments Signed-off-by: loselarry --- base/poco/Foundation/src/pcre_compile.c | 2 +- docs/en/operations/settings/merge-tree-settings.md | 2 +- docs/en/operations/settings/settings.md | 2 +- src/Functions/FunctionsDecimalArithmetics.h | 2 +- src/Functions/serverConstants.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/base/poco/Foundation/src/pcre_compile.c b/base/poco/Foundation/src/pcre_compile.c index 3a6fafe8d569..b5f5f9a82864 100644 --- a/base/poco/Foundation/src/pcre_compile.c +++ b/base/poco/Foundation/src/pcre_compile.c @@ -4835,7 +4835,7 @@ for (;; ptr++) If the class contains characters outside the 0-255 range, a different opcode is compiled. It may optionally have a bit map for characters < 256, - but those above are are explicitly listed afterwards. A flag byte tells + but those above are explicitly listed afterwards. A flag byte tells whether the bitmap is present, and whether this is a negated class or not. In JavaScript compatibility mode, an isolated ']' causes an error. In diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 3e411a51ff46..9327d52227f9 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -287,7 +287,7 @@ Default value: 0 (seconds) ## remote_fs_execute_merges_on_single_replica_time_threshold -When this setting has a value greater than than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled. +When this setting has a value greater than zero only a single replica starts the merge immediately if merged part on shared storage and `allow_remote_fs_zero_copy_replication` is enabled. :::note Zero-copy replication is not ready for production Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e4e7be83f7da..3e38c22dd8fe 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5302,7 +5302,7 @@ SETTINGS(dictionary_use_async_executor=1, max_threads=8); ## storage_metadata_write_full_object_key {#storage_metadata_write_full_object_key} When set to `true` the metadata files are written with `VERSION_FULL_OBJECT_KEY` format version. With that format full object storage key names are written to the metadata files. -When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. +When set to `false` the metadata files are written with the previous format version, `VERSION_INLINE_DATA`. With that format only suffixes of object storage key names are written to the metadata files. The prefix for all of object storage key names is set in configurations files at `storage_configuration.disks` section. Default value: `false`. diff --git a/src/Functions/FunctionsDecimalArithmetics.h b/src/Functions/FunctionsDecimalArithmetics.h index 79e10d215a97..e26ad7362b37 100644 --- a/src/Functions/FunctionsDecimalArithmetics.h +++ b/src/Functions/FunctionsDecimalArithmetics.h @@ -280,7 +280,7 @@ class FunctionsDecimalArithmetics : public IFunction /** At compile time, result is unknown. We only know the Scale (number of fractional digits) at runtime. Also nothing is known about size of whole part. - As in simple division/multiplication for decimals, we scale the result up, but is is explicit here and no downscale is performed. + As in simple division/multiplication for decimals, we scale the result up, but it is explicit here and no downscale is performed. It guarantees that result will have given scale and it can also be MANUALLY converted to other decimal types later. **/ if (scale > DecimalUtils::max_precision) diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index fd8fb22455bf..e7e423058f11 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -32,7 +32,7 @@ namespace #endif - /// Get the host name. Is is constant on single server, but is not constant in distributed queries. + /// Get the host name. It is constant on single server, but is not constant in distributed queries. class FunctionHostName : public FunctionConstantBase { public: From 1f48b97d23ad8dd03cbad66280db1def4c489b51 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 10 Apr 2024 09:56:54 +0000 Subject: [PATCH 429/470] Fix clang-tidy build --- src/Common/examples/parallel_aggregation.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/examples/parallel_aggregation.cpp b/src/Common/examples/parallel_aggregation.cpp index 20f5f1c5224b..7094690a3a88 100644 --- a/src/Common/examples/parallel_aggregation.cpp +++ b/src/Common/examples/parallel_aggregation.cpp @@ -205,7 +205,7 @@ static void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexe else { size_t hash_value = global_map.hash(*it); - size_t bucket = global_map.getBucketFromHash(hash_value); + size_t bucket = MapTwoLevel::getBucketFromHash(hash_value); if (mutexes[bucket].try_lock()) { From 927c5ca1fcefd3091c9a9967da2f1e1bb1affc74 Mon Sep 17 00:00:00 2001 From: peter279k Date: Wed, 10 Apr 2024 18:09:02 +0800 Subject: [PATCH 430/470] Update example --- .../sql-reference/functions/string-replace-functions.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md index ab39b064a8bf..60fe286de258 100644 --- a/docs/en/sql-reference/functions/string-replace-functions.md +++ b/docs/en/sql-reference/functions/string-replace-functions.md @@ -215,11 +215,11 @@ translateUTF8(s, from, to) Query: ``` sql -SELECT translateUTF8('Hello, World!', 'delor', 'DELOR') AS res; +SELECT translateUTF8('Münchener Straße', 'üß', 'us') AS res; ``` ``` response -┌─res───────────┐ -│ HELLO, WORLD! │ -└───────────────┘ +┌─res──────────────┐ +│ Munchener Strase │ +└──────────────────┘ ``` From 06bbf97cc057fc872d0b0749b0d349d21bbb0f72 Mon Sep 17 00:00:00 2001 From: peter279k Date: Wed, 10 Apr 2024 18:12:29 +0800 Subject: [PATCH 431/470] Update example --- docs/en/sql-reference/functions/string-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 9ca77eff0e0f..d4df3e0479a8 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -325,12 +325,12 @@ upperUTF8(input) Query: ``` sql -SELECT upperUTF8('value') as Upperutf8; +SELECT upperUTF8('München') as Upperutf8; ``` ``` response ┌─Upperutf8─┐ -│ VALUE │ +│ MÜNCHEN │ └───────────┘ ``` From 444ad3bf62f2099de1978bcddc0413255fe8ac93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 12:12:53 +0200 Subject: [PATCH 432/470] Fix random clang tidy warning --- src/Common/examples/encrypt_decrypt.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/examples/encrypt_decrypt.cpp b/src/Common/examples/encrypt_decrypt.cpp index c7f949195c87..18d4e50be8ac 100644 --- a/src/Common/examples/encrypt_decrypt.cpp +++ b/src/Common/examples/encrypt_decrypt.cpp @@ -35,9 +35,9 @@ int main(int argc, char ** argv) DB::CompressionCodecEncrypted::Configuration::instance().load(*loaded_config.configuration, "encryption_codecs"); if (action == "-e") - std::cout << processor.encryptValue(codec_name, value) << std::endl; + std::cout << DB::ConfigProcessor::encryptValue(codec_name, value) << std::endl; else if (action == "-d") - std::cout << processor.decryptValue(codec_name, value) << std::endl; + std::cout << DB::ConfigProcessor::decryptValue(codec_name, value) << std::endl; else std::cerr << "Unknown action: " << action << std::endl; } From e76aefac9c574ab11fc715109728cc8612df61e0 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Apr 2024 11:55:38 +0000 Subject: [PATCH 433/470] Removing users.xml --- tests/queries/0_stateless/users.xml | 110 ---------------------------- 1 file changed, 110 deletions(-) delete mode 100644 tests/queries/0_stateless/users.xml diff --git a/tests/queries/0_stateless/users.xml b/tests/queries/0_stateless/users.xml deleted file mode 100644 index a199435b42f7..000000000000 --- a/tests/queries/0_stateless/users.xml +++ /dev/null @@ -1,110 +0,0 @@ - - - - - - - - - - - - 1 - - - - - - - - - c64c5e4e53ea1a9f1427d2713b3a22bbebe8940bc807adaf654744b1568c70ab - - - - ::/0 - - - - default - - - default - - - 1 - - - - - - - - - - - 3600 - - - 0 - 0 - 0 - 0 - 0 - - - - From 715e6d90d7c1bbcb9d4f179a7faa6da716bff776 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Apr 2024 11:56:32 +0000 Subject: [PATCH 434/470] Removing 02203_shebang.bak --- tests/queries/0_stateless/02203_shebang.bak | 3 --- 1 file changed, 3 deletions(-) delete mode 100755 tests/queries/0_stateless/02203_shebang.bak diff --git a/tests/queries/0_stateless/02203_shebang.bak b/tests/queries/0_stateless/02203_shebang.bak deleted file mode 100755 index 07686d1aab48..000000000000 --- a/tests/queries/0_stateless/02203_shebang.bak +++ /dev/null @@ -1,3 +0,0 @@ -#!/usr/bin/clickhouse-local --queries-file - -SELECT 1; From 7bf4976198b38f0a472d56ba7138d877b68934eb Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 10 Apr 2024 12:57:48 +0000 Subject: [PATCH 435/470] impl --- tests/queries/0_stateless/01592_long_window_functions1.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/queries/0_stateless/01592_long_window_functions1.sql b/tests/queries/0_stateless/01592_long_window_functions1.sql index c63c651fb0b8..d2d32e24eaab 100644 --- a/tests/queries/0_stateless/01592_long_window_functions1.sql +++ b/tests/queries/0_stateless/01592_long_window_functions1.sql @@ -1,5 +1,8 @@ -- Tags: long +-- test became more than an order of magnitude slower with max_bytes_before_external_sort=1 +set max_bytes_before_external_sort = 0; + drop table if exists stack; set max_insert_threads = 4; From f0d7f3b44f56fc813f4e3463033b4567502c6179 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Tue, 9 Apr 2024 21:23:31 +0200 Subject: [PATCH 436/470] Style fixes --- src/Common/ZooKeeper/ZooKeeperConstants.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Common/ZooKeeper/ZooKeeperConstants.h b/src/Common/ZooKeeper/ZooKeeperConstants.h index 6349cc058d4d..1d9830505f8e 100644 --- a/src/Common/ZooKeeper/ZooKeeperConstants.h +++ b/src/Common/ZooKeeper/ZooKeeperConstants.h @@ -67,9 +67,10 @@ static constexpr int32_t DEFAULT_CONNECTION_TIMEOUT_MS = 1000; } /// This is used by fmt::format to print OpNum as strings. -/// All OpNum values shoud be in range [min, max] to be printed. +/// All OpNum values should be in range [min, max] to be printed. template <> -struct magic_enum::customize::enum_range { - static constexpr int min = -100; - static constexpr int max = 1000; +struct magic_enum::customize::enum_range +{ + static constexpr int min = -100; + static constexpr int max = 1000; }; From e4cb27eda3b73977e87208ad6a7ec34caea30b2e Mon Sep 17 00:00:00 2001 From: yokofly Date: Wed, 10 Apr 2024 21:22:13 +0800 Subject: [PATCH 437/470] fix macOS binary install the usr/bin is not suitable for macOS https://superuser.com/questions/933019/sudo-cant-create-file-in-usr-bin-in-el-capitan after apply this patch, we can `sudo ./clickhouse --install` on MacOS --- programs/install/Install.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 20c1a0ad4a86..0f7305cafbd1 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -46,7 +46,7 @@ INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); * * The following steps are performed: * - * - copying the binary to binary directory (/usr/bin). + * - copying the binary to binary directory (/usr/local/bin (Apple macOS) or /usr/bin (Others)). * - creation of symlinks for tools. * - creation of clickhouse user and group. * - creation of config directory (/etc/clickhouse-server). @@ -226,7 +226,11 @@ int mainEntryClickHouseInstall(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + ("binary-path", po::value()->default_value("usr/local/bin"), "where to install binaries") +#else ("binary-path", po::value()->default_value("usr/bin"), "where to install binaries") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "where to install configs") ("log-path", po::value()->default_value("var/log/clickhouse-server"), "where to create log directory") ("data-path", po::value()->default_value("var/lib/clickhouse"), "directory for data") @@ -1216,7 +1220,11 @@ int mainEntryClickHouseStart(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") +#else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") @@ -1332,7 +1340,11 @@ int mainEntryClickHouseRestart(int argc, char ** argv) desc.add_options() ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") +#if defined (OS_DARWIN) + ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") +#else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") +#endif ("config-path", po::value()->default_value("etc/clickhouse-server"), "directory with configs") ("pid-path", po::value()->default_value("var/run/clickhouse-server"), "directory for pid file") ("user", po::value()->default_value(DEFAULT_CLICKHOUSE_SERVER_USER), "clickhouse user") From 2e4e3187d5d342fec7565b9db9652e90084f5e3d Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 10 Apr 2024 13:17:39 +0000 Subject: [PATCH 438/470] merge sync pr on push to master --- .github/workflows/master.yml | 4 ++++ tests/ci/pr_info.py | 5 +++- tests/ci/sync_pr.py | 45 ++++++++++++++++++++++++++++++++++++ 3 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tests/ci/sync_pr.py diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 816bdfd4f31a..64372a90613e 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -23,6 +23,10 @@ jobs: clear-repository: true # to ensure correct digests fetch-depth: 0 # to get version filter: tree:0 + - name: Check sync PR + run: | + cd "$GITHUB_WORKSPACE/tests/ci" + python3 sync_pr.py || : - name: Python unit tests run: | cd "$GITHUB_WORKSPACE/tests/ci" diff --git a/tests/ci/pr_info.py b/tests/ci/pr_info.py index 204284785c9a..293004fc4f3a 100644 --- a/tests/ci/pr_info.py +++ b/tests/ci/pr_info.py @@ -199,7 +199,6 @@ def __init__( EventType.MERGE_QUEUE in github_event ): # pull request and other similar events self.event_type = EventType.MERGE_QUEUE - # FIXME: need pr? we can parse it from ["head_ref": "refs/heads/gh-readonly-queue/test-merge-queue/pr-6751-4690229995a155e771c52e95fbd446d219c069bf"] self.number = 0 self.sha = github_event[EventType.MERGE_QUEUE]["head_sha"] self.base_ref = github_event[EventType.MERGE_QUEUE]["base_ref"] @@ -208,6 +207,8 @@ def __init__( self.base_name = github_event["repository"]["full_name"] # any_branch-name - the name of working branch name self.head_ref = github_event[EventType.MERGE_QUEUE]["head_ref"] + # parse underlying pr from ["head_ref": "refs/heads/gh-readonly-queue/test-merge-queue/pr-6751-4690229995a155e771c52e95fbd446d219c069bf"] + self.merged_pr = int(self.head_ref.split("/pr-")[-1].split("-")[0]) # UserName/ClickHouse or ClickHouse/ClickHouse self.head_name = self.base_name self.user_login = github_event["sender"]["login"] @@ -235,6 +236,8 @@ def __init__( if pull_request is None or pull_request["state"] == "closed": # it's merged PR to master self.number = 0 + if pull_request: + self.merged_pr = pull_request["number"] self.labels = set() self.pr_html_url = f"{repo_prefix}/commits/{ref}" self.base_ref = ref diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py new file mode 100644 index 000000000000..dfa7379ae6f3 --- /dev/null +++ b/tests/ci/sync_pr.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python + +"""Script for automatic sync PRs handling in private repos""" + +import sys + +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from github_helper import GitHub + + +def main(): + gh = GitHub(get_best_robot_token()) + + pr_info = PRInfo() + assert pr_info.merged_pr, "BUG. merged PR number could not been determined" + + prs = gh.get_pulls_from_search( + query=f"type:pr [Sync] ClickHouse/ClickHouse#{pr_info.merged_pr}", + repo="ClickHouse/clickhouse-private", + ) + if len(prs) > 1: + print(f"WARNING: More than one PR found [{prs}]") + elif len(prs) == 0: + print("WARNING: No Sync PR found") + sys.exit(0) + + pr = prs[0] + + if pr.state == "closed": + print(f"Sync PR [{pr.number}] already closed - exiting") + sys.exit(0) + + if pr.state != "open": + print(f"WARNING: Unknown Sync PR [{pr.number}] state [{pr.state}] - exiting") + sys.exit(0) + + print(f"NOTE: Trying to merge Sync PR [{pr.number}]") + if pr.draft: + gh.toggle_pr_draft(pr) + pr.merge() + + +if __name__ == "__main__": + main() From 5c6a3f1b769f6542bb08132f23627ba950c1672a Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Apr 2024 14:21:10 +0000 Subject: [PATCH 439/470] Review fixes. --- src/Planner/PlannerActionsVisitor.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 555eb8dbdc3d..63d7f44239e0 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -510,26 +510,31 @@ class PlannerActionsVisitorImpl void set(size_t level) { check(level); - mask |= uint64_t(1) << level; + if (level) + mask |= (uint64_t(1) << (level - 1)); } void reset(size_t level) { check(level); - mask &= ~(uint64_t(1) << level); + if (level) + mask &= ~(uint64_t(1) << (level - 1)); } void add(Levels levels) { mask |= levels.mask; } - size_t max() const { return 63 - __builtin_clzll(mask); } + size_t max() const + { + return mask ? (64 - __builtin_clzll(mask)) : 0; + } private: uint64_t mask = 0; void check(size_t level) { - if (level >= 63) - throw Exception(ErrorCodes::INCORRECT_QUERY, "Maximum lambda depth exceeded. Maximum 63."); + if (level > 64) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Maximum lambda depth exceeded. Maximum 64."); } }; From 4ed518d6ad83c1e69326e527882de57f127e6a8b Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 10 Apr 2024 14:18:47 +0000 Subject: [PATCH 440/470] Docs: Update date_add --- docs/en/sql-reference/functions/date-time-functions.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 4c4190043446..3bb9d4e7dbe6 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -1906,7 +1906,7 @@ Aliases: `dateAdd`, `DATE_ADD`. **Arguments** -- `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). +- `unit` — The type of interval to add. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. Possible values: - `second` @@ -1961,7 +1961,7 @@ Aliases: `dateSub`, `DATE_SUB`. **Arguments** -- `unit` — The type of interval to subtract. Note: The unit should be unquoted. +- `unit` — The type of interval to subtract. Note: This is not a [String](../../sql-reference/data-types/string.md) and must therefore not be quoted. Possible values: From c6f2b9bbdc7f176e517392c43697124fbe4b9529 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Apr 2024 14:24:04 +0000 Subject: [PATCH 441/470] Update test. --- tests/queries/0_stateless/02389_analyzer_nested_lambda.sql | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql index 0ec1b6586948..be4b64888ca2 100644 --- a/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql +++ b/tests/queries/0_stateless/02389_analyzer_nested_lambda.sql @@ -133,3 +133,8 @@ SELECT arraySum(arrayMap(x -> (x + 1), counts)) as hello, arrayMap(x -> (x / hello), counts) AS res FROM numbers(1000000) FORMAT Null; + +SELECT + arrayWithConstant(pow(10,6), 1) AS nums, + arrayMap(x -> x, nums) AS m, + arrayMap(x -> x + arraySum(m), m) AS res FORMAT Null; From 4408714c554be721f3e8331f4ae1217eeed249bf Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Apr 2024 17:09:01 +0200 Subject: [PATCH 442/470] Remove subset dotProduct from tuple-functions and add dotProduct as an alias to the superset --- .../functions/array-functions.md | 2 +- .../functions/tuple-functions.md | 39 ------------------- 2 files changed, 1 insertion(+), 40 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index f3a031f9da74..b54ddd6d5aeb 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -784,7 +784,7 @@ Returns the dot product of two arrays. arrayDotProduct(vector1, vector2) ``` -Alias: `scalarProduct` +Alias: `scalarProduct`, `dotProduct` **Parameters** diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index b089de67e98f..859e9fb4fd4c 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -521,45 +521,6 @@ Result: └──────────────────────────────────┘ ``` -## dotProduct - -Calculates the scalar product of two tuples of the same size. - -**Syntax** - -```sql -dotProduct(tuple1, tuple2) -``` - -Alias: `scalarProduct`. - -**Arguments** - -- `tuple1` — First tuple. [Tuple](../../sql-reference/data-types/tuple.md). -- `tuple2` — Second tuple. [Tuple](../../sql-reference/data-types/tuple.md). - -**Returned value** - -- Scalar product. - -Type: [Int/UInt](../../sql-reference/data-types/int-uint.md) or [Float](../../sql-reference/data-types/float.md). - -**Example** - -Query: - -```sql -SELECT dotProduct((1, 2), (2, 3)); -``` - -Result: - -```text -┌─dotProduct((1, 2), (2, 3))─┐ -│ 8 │ -└────────────────────────────┘ -``` - ## tupleConcat Combines tuples passed as arguments. From e8f3279f377ba62e2b64b79da92fdc49f671dfc8 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 10 Apr 2024 15:30:20 +0000 Subject: [PATCH 443/470] Review fix --- src/Planner/PlannerActionsVisitor.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Planner/PlannerActionsVisitor.cpp b/src/Planner/PlannerActionsVisitor.cpp index 63d7f44239e0..02c1c56fae2e 100644 --- a/src/Planner/PlannerActionsVisitor.cpp +++ b/src/Planner/PlannerActionsVisitor.cpp @@ -523,10 +523,7 @@ class PlannerActionsVisitorImpl void add(Levels levels) { mask |= levels.mask; } - size_t max() const - { - return mask ? (64 - __builtin_clzll(mask)) : 0; - } + size_t max() const { return 64 - getLeadingZeroBits(mask); } private: uint64_t mask = 0; From 6fcaeca2e6a3af00725005afe39801937219a9f6 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Apr 2024 17:44:11 +0200 Subject: [PATCH 444/470] Clarify arrayPartialShuffle --- .../functions/array-functions.md | 27 ++++++++++--------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index b54ddd6d5aeb..2f7f9807fca7 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1424,7 +1424,7 @@ Elements are reordered in such a way that each possible permutation of those ele **Syntax** ```sql -arrayShuffle(arr, seed) +arrayShuffle(arr[, seed]) ``` **Parameters** @@ -1476,12 +1476,12 @@ Result: ## arrayPartialShuffle -Returns an array of the same size as the original array where elements in range `[1..limit]` are a random subset of the original array. Remaining `(limit..N]` shall contain the elements not in `[1..limit]` range in an undefined order. +Given an input array of cardinality `N`, returns an array of size N where elements in the range `[1...limit]`are shuffled and the remaining elements in the range `(limit...n]` are unshuffled. **Syntax** ```sql -arrayPartialShuffle(arr, limit, seed) +arrayPartialShuffle(arr[, limit[, seed]]) ``` **Parameters** @@ -1504,34 +1504,35 @@ The value of `limit` should be in the range `[1..N]`. Values outside of that ran **Examples** -In this example, `arrayPartialShuffle` is used without the `limit` and `seed` parameters. +Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. Query: ```sql -SELECT arrayPartialShuffle([1, 2, 3, 4], 0); -SELECT arrayPartialShuffle([1, 2, 3, 4]); +SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 1) ``` -Note: when using [ClickHouse Fiddle](https://fiddle.clickhouse.com/), the exact response may differ due to random nature of the function. +Result: + +The order of elements is preserved (`[2,3,4,5], [7,8,9,10]`) except for the 2 shuffled elements `[1, 6]`. No `seed` is provided so the function selects its own randomly. -Result: ```response -[3,1,2,4] -[4,1,3,2] +[6,2,3,4,5,1,7,8,9,10] ``` -In this example, the `arrayPartialShuffle` function is provided a `limit` and a `seed`. +In this example, the `limit` is increased to `2` and a `seed` value is provided. The order Query: ```sql -SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 5, 0xbad_cafe); +SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 2); ``` +The order of elements is preserved (`[4, 5, 6, 7, 8], [10]`) except for the 4 shuffled elements `[1, 2, 3, 9]`. + Result: ```response -[10,9,4,2,5,6,7,8,3,1] +[3,9,1,4,5,6,7,8,2,10] ``` ## arrayUniq(arr, …) From c179885265a2a4b8db503aff5490b863d401ef4a Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Apr 2024 17:53:31 +0200 Subject: [PATCH 445/470] Add example for arrayFirstOrNull with multiple arrays --- docs/en/sql-reference/functions/array-functions.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 2f7f9807fca7..03b0e642dfbd 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -2280,6 +2280,18 @@ Result: \N ``` +Query: + +```sql +SELECT arrayLastOrNull((x,f) -> f, [1,2,3,NULL], [0,1,0,1]); +``` + +Result: + +```response +\N +``` + ## arrayLast(func, arr1, …) Returns the last element in the `arr1` array for which `func(arr1[i], …, arrN[i])` returns something other than 0. From 5657e6aa35f5ebc3b355d4036b2c9802b2ddcf03 Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Apr 2024 17:55:52 +0200 Subject: [PATCH 446/470] Fix style --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index c3bf44666d2c..ace632824f94 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -996,6 +996,7 @@ UncompressedCacheBytes UncompressedCacheCells UnidirectionalEdgeIsValid UniqThetaSketch +unshuffled Updatable Uppercased Uptime From 19916de7310089ec1a676c1cb06f9498b7835e46 Mon Sep 17 00:00:00 2001 From: Shaun Struwig <41984034+Blargian@users.noreply.github.com> Date: Wed, 10 Apr 2024 18:03:20 +0200 Subject: [PATCH 447/470] Update array-functions.md small fixes to wording --- docs/en/sql-reference/functions/array-functions.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index 03b0e642dfbd..87e733a4b0cc 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -1476,7 +1476,7 @@ Result: ## arrayPartialShuffle -Given an input array of cardinality `N`, returns an array of size N where elements in the range `[1...limit]`are shuffled and the remaining elements in the range `(limit...n]` are unshuffled. +Given an input array of cardinality `N`, returns an array of size N where elements in the range `[1...limit]` are shuffled and the remaining elements in the range `(limit...n]` are unshuffled. **Syntax** @@ -1514,7 +1514,7 @@ SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 1) Result: -The order of elements is preserved (`[2,3,4,5], [7,8,9,10]`) except for the 2 shuffled elements `[1, 6]`. No `seed` is provided so the function selects its own randomly. +The order of elements is preserved (`[2,3,4,5], [7,8,9,10]`) except for the two shuffled elements `[1, 6]`. No `seed` is provided so the function selects its own randomly. ```response [6,2,3,4,5,1,7,8,9,10] @@ -1528,7 +1528,7 @@ Query: SELECT arrayPartialShuffle([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 2); ``` -The order of elements is preserved (`[4, 5, 6, 7, 8], [10]`) except for the 4 shuffled elements `[1, 2, 3, 9]`. +The order of elements is preserved (`[4, 5, 6, 7, 8], [10]`) except for the four shuffled elements `[1, 2, 3, 9]`. Result: ```response From 12569cc5fe880f9a25728158884db1ac2af00472 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 18:18:47 +0200 Subject: [PATCH 448/470] Don't allow the fuzzer to change allow_experimental_analyzer --- docker/test/fuzzer/query-fuzzer-tweaks-users.xml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml index 023f257253a4..c31d2fd7f397 100644 --- a/docker/test/fuzzer/query-fuzzer-tweaks-users.xml +++ b/docker/test/fuzzer/query-fuzzer-tweaks-users.xml @@ -26,6 +26,11 @@ 200 + + + + + From f32d3d3096ed1a3367a0ad39a35f1ec4dd91417f Mon Sep 17 00:00:00 2001 From: Blargian Date: Wed, 10 Apr 2024 18:27:01 +0200 Subject: [PATCH 449/470] Update lower function --- .../functions/string-functions.md | 36 ++++++++++++++++--- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index d4df3e0479a8..f4d667300111 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -254,8 +254,36 @@ Result: Converts the ASCII Latin symbols in a string to lowercase. +*Syntax** + +``` sql +lower(input) +``` + Alias: `lcase` +**Parameters** + +- `input`: A string type [String](/docs/en/sql-reference/data-types/string.md). + +**Returned value** + +- A [String](/docs/en/sql-reference/data-types/string.md) data type value. + +**Example** + +Query: + +```sql +SELECT lower('CLICKHOUSE'); +``` + +```response +┌─lower('CLICKHOUSE')─┐ +│ clickhouse │ +└─────────────────────┘ +``` + ## upper Converts the ASCII Latin symbols in a string to uppercase. @@ -281,13 +309,13 @@ Alias: `ucase` Query: ``` sql -SELECT upper('value') as Upper; +SELECT upper('clickhouse'); ``` ``` response -┌─Upper─┐ -│ VALUE │ -└───────┘ +┌─upper('clickhouse')─┐ +│ CLICKHOUSE │ +└─────────────────────┘ ``` ## lowerUTF8 From 25c0f0360eb73b3996fbcd4bfea78a0731c2b8f2 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 10 Apr 2024 17:13:11 +0000 Subject: [PATCH 450/470] Update 02911_support_alias_column_in_indices.sql --- .../0_stateless/02911_support_alias_column_in_indices.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql index 46d915343398..4d68786d7db7 100644 --- a/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql +++ b/tests/queries/0_stateless/02911_support_alias_column_in_indices.sql @@ -35,6 +35,6 @@ insert into test2 select * from numbers(10); insert into test2 select * from numbers(11, 20); explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 0; -explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 1; -- buggy, analyzer does not pick up index i +explain indexes = 1 select * from test2 where a2 > 15 settings allow_experimental_analyzer = 1; drop database 02911_support_alias_column_in_indices; From 752a69b6ddc8195e34bc0c637129713d210c16f8 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 10 Apr 2024 17:37:24 +0000 Subject: [PATCH 451/470] Minor fixups --- programs/install/Install.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 0f7305cafbd1..f2ef3857d63c 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -46,12 +46,12 @@ INCBIN(resource_users_xml, SOURCE_DIR "/programs/server/users.xml"); * * The following steps are performed: * - * - copying the binary to binary directory (/usr/local/bin (Apple macOS) or /usr/bin (Others)). + * - copying the binary to binary directory (/usr/bin/) * - creation of symlinks for tools. * - creation of clickhouse user and group. - * - creation of config directory (/etc/clickhouse-server). + * - creation of config directory (/etc/clickhouse-server/). * - creation of default configuration files. - * - creation of a directory for logs (/var/log/clickhouse-server). + * - creation of a directory for logs (/var/log/clickhouse-server/). * - creation of a data directory if not exists. * - setting a password for default user. * - choose an option to listen connections. @@ -227,6 +227,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") #if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 ("binary-path", po::value()->default_value("usr/local/bin"), "where to install binaries") #else ("binary-path", po::value()->default_value("usr/bin"), "where to install binaries") @@ -1221,6 +1222,7 @@ int mainEntryClickHouseStart(int argc, char ** argv) ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") #if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") #else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") @@ -1341,6 +1343,7 @@ int mainEntryClickHouseRestart(int argc, char ** argv) ("help,h", "produce help message") ("prefix", po::value()->default_value("/"), "prefix for all paths") #if defined (OS_DARWIN) + /// https://stackoverflow.com/a/36734569/22422288 ("binary-path", po::value()->default_value("usr/local/bin"), "directory with binary") #else ("binary-path", po::value()->default_value("usr/bin"), "directory with binary") From 407f15465042382d1a38806c1e2947b25a4478bd Mon Sep 17 00:00:00 2001 From: Max Kainov Date: Wed, 10 Apr 2024 18:07:05 +0000 Subject: [PATCH 452/470] comments --- tests/ci/sync_pr.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/sync_pr.py b/tests/ci/sync_pr.py index dfa7379ae6f3..f33f6122f309 100644 --- a/tests/ci/sync_pr.py +++ b/tests/ci/sync_pr.py @@ -16,12 +16,13 @@ def main(): assert pr_info.merged_pr, "BUG. merged PR number could not been determined" prs = gh.get_pulls_from_search( - query=f"type:pr [Sync] ClickHouse/ClickHouse#{pr_info.merged_pr}", + query=f"head:sync-upstream/pr/{pr_info.merged_pr} org:ClickHouse type:pr", repo="ClickHouse/clickhouse-private", ) if len(prs) > 1: - print(f"WARNING: More than one PR found [{prs}]") - elif len(prs) == 0: + print(f"WARNING: More than one PR found [{prs}] - exiting") + sys.exit(0) + if len(prs) == 0: print("WARNING: No Sync PR found") sys.exit(0) @@ -35,7 +36,7 @@ def main(): print(f"WARNING: Unknown Sync PR [{pr.number}] state [{pr.state}] - exiting") sys.exit(0) - print(f"NOTE: Trying to merge Sync PR [{pr.number}]") + print(f"Trying to merge Sync PR [{pr.number}]") if pr.draft: gh.toggle_pr_draft(pr) pr.merge() From 033efd720487d92cb61b0dc1ed3e8ef23eaaa342 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 20:25:20 +0200 Subject: [PATCH 453/470] Revert "Rich syntax highlighting in the client" --- src/Client/ClientBase.cpp | 8 +- src/Client/ClientBaseHelpers.cpp | 162 ++++++++---------- src/Parsers/ASTOrderByElement.cpp | 1 + src/Parsers/CommonParsers.h | 2 - src/Parsers/ExpressionElementParsers.cpp | 4 +- src/Parsers/ExpressionElementParsers.h | 13 +- src/Parsers/ExpressionListParsers.cpp | 83 ++++----- src/Parsers/IParser.cpp | 23 --- src/Parsers/IParser.h | 39 ----- src/Parsers/IParserBase.cpp | 19 +- src/Parsers/ParserInsertQuery.cpp | 4 +- src/Parsers/parseDatabaseAndTableName.cpp | 15 ++ src/Parsers/parseQuery.cpp | 58 +++---- src/Parsers/parseQuery.h | 5 - ..._autocomplete_word_break_characters.expect | 2 +- ...01565_query_loop_after_client_error.expect | 19 +- .../01676_clickhouse_client_autocomplete.sh | 2 +- .../01702_system_query_log.reference | 20 +-- ...160_client_autocomplete_parse_query.expect | 2 +- 19 files changed, 180 insertions(+), 301 deletions(-) diff --git a/src/Client/ClientBase.cpp b/src/Client/ClientBase.cpp index f37b391eb664..4948402bb7fb 100644 --- a/src/Client/ClientBase.cpp +++ b/src/Client/ClientBase.cpp @@ -2061,7 +2061,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( return MultiQueryProcessingStage::QUERIES_END; // Remove leading empty newlines and other whitespace, because they - // are annoying to filter in the query log. This is mostly relevant for + // are annoying to filter in query log. This is mostly relevant for // the tests. while (this_query_begin < all_queries_end && isWhitespaceASCII(*this_query_begin)) ++this_query_begin; @@ -2091,7 +2091,7 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( { parsed_query = parseQuery(this_query_end, all_queries_end, true); } - catch (const Exception & e) + catch (Exception & e) { current_exception.reset(e.clone()); return MultiQueryProcessingStage::PARSING_EXCEPTION; @@ -2116,9 +2116,9 @@ MultiQueryProcessingStage ClientBase::analyzeMultiQueryText( // INSERT queries may have the inserted data in the query text // that follow the query itself, e.g. "insert into t format CSV 1;2". // They need special handling. First of all, here we find where the - // inserted data ends. In multi-query mode, it is delimited by a + // inserted data ends. In multy-query mode, it is delimited by a // newline. - // The VALUES format needs even more handling - we also allow the + // The VALUES format needs even more handling -- we also allow the // data to be delimited by semicolon. This case is handled later by // the format parser itself. // We can't do multiline INSERTs with inline data, because most diff --git a/src/Client/ClientBaseHelpers.cpp b/src/Client/ClientBaseHelpers.cpp index b1d29b34ffc6..b08626962957 100644 --- a/src/Client/ClientBaseHelpers.cpp +++ b/src/Client/ClientBaseHelpers.cpp @@ -1,14 +1,11 @@ #include "ClientBaseHelpers.h" + #include #include -#include -#include +#include #include -#include - - namespace DB { @@ -99,102 +96,77 @@ void highlight(const String & query, std::vector & colors { using namespace replxx; - /// The `colors` array maps to a Unicode code point position in a string into a color. - /// A color is set for every position individually (not for a range). - - /// Empty input. - if (colors.empty()) - return; - - /// The colors should be legible (and look gorgeous) in both dark and light themes. - /// When modifying this, check it in both themes. + static const std::unordered_map token_to_color + = {{TokenType::Whitespace, Replxx::Color::DEFAULT}, + {TokenType::Comment, Replxx::Color::GRAY}, + {TokenType::BareWord, Replxx::Color::DEFAULT}, + {TokenType::Number, Replxx::Color::GREEN}, + {TokenType::StringLiteral, Replxx::Color::CYAN}, + {TokenType::QuotedIdentifier, Replxx::Color::MAGENTA}, + {TokenType::OpeningRoundBracket, Replxx::Color::BROWN}, + {TokenType::ClosingRoundBracket, Replxx::Color::BROWN}, + {TokenType::OpeningSquareBracket, Replxx::Color::BROWN}, + {TokenType::ClosingSquareBracket, Replxx::Color::BROWN}, + {TokenType::DoubleColon, Replxx::Color::BROWN}, + {TokenType::OpeningCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::ClosingCurlyBrace, replxx::color::bold(Replxx::Color::DEFAULT)}, + + {TokenType::Comma, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Semicolon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::VerticalDelimiter, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Dot, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Asterisk, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::HereDoc, Replxx::Color::CYAN}, + {TokenType::Plus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Minus, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Slash, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Percent, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Arrow, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::QuestionMark, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Colon, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Equals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::NotEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Less, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Greater, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::LessOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::GreaterOrEquals, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Spaceship, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::Concatenation, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::At, replxx::color::bold(Replxx::Color::DEFAULT)}, + {TokenType::DoubleAt, Replxx::Color::MAGENTA}, + + {TokenType::EndOfStream, Replxx::Color::DEFAULT}, + + {TokenType::Error, Replxx::Color::RED}, + {TokenType::ErrorMultilineCommentIsNotClosed, Replxx::Color::RED}, + {TokenType::ErrorSingleQuoteIsNotClosed, Replxx::Color::RED}, + {TokenType::ErrorDoubleQuoteIsNotClosed, Replxx::Color::RED}, + {TokenType::ErrorSinglePipeMark, Replxx::Color::RED}, + {TokenType::ErrorWrongNumber, Replxx::Color::RED}, + {TokenType::ErrorMaxQuerySizeExceeded, Replxx::Color::RED}}; + + const Replxx::Color unknown_token_color = Replxx::Color::RED; + + Lexer lexer(query.data(), query.data() + query.size()); + size_t pos = 0; - static const std::unordered_map type_to_color = + for (Token token = lexer.nextToken(); !token.isEnd(); token = lexer.nextToken()) { - {Highlight::keyword, replxx::color::bold(Replxx::Color::DEFAULT)}, - {Highlight::identifier, Replxx::Color::CYAN}, - {Highlight::function, Replxx::Color::BROWN}, - {Highlight::alias, replxx::color::rgb666(0, 4, 4)}, - {Highlight::substitution, Replxx::Color::MAGENTA}, - {Highlight::number, replxx::color::rgb666(0, 4, 0)}, - {Highlight::string, Replxx::Color::GREEN}, - }; - - /// We set reasonably small limits for size/depth, because we don't want the CLI to be slow. - /// While syntax highlighting is unneeded for long queries, which the user couldn't read anyway. + if (token.type == TokenType::Semicolon || token.type == TokenType::VerticalDelimiter) + ReplxxLineReader::setLastIsDelimiter(true); + else if (token.type != TokenType::Whitespace) + ReplxxLineReader::setLastIsDelimiter(false); - const char * begin = query.data(); - const char * end = begin + query.size(); - Tokens tokens(begin, end, 1000, true); - IParser::Pos token_iterator(tokens, static_cast(1000), static_cast(10000)); - Expected expected; - - /// We don't do highlighting for foreign dialects, such as PRQL and Kusto. - /// Only normal ClickHouse SQL queries are highlighted. - - /// Currently we highlight only the first query in the multi-query mode. - - ParserQuery parser(end); - ASTPtr ast; - bool parse_res = false; - - try - { - parse_res = parser.parse(token_iterator, ast, expected); - } - catch (...) - { - /// Skip highlighting in the case of exceptions during parsing. - /// It is ok to ignore unknown exceptions here. - return; - } - - size_t pos = 0; - const char * prev = begin; - for (const auto & range : expected.highlights) - { - auto it = type_to_color.find(range.highlight); - if (it != type_to_color.end()) + size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(token.begin), token.size()); + for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) { - /// We have to map from byte positions to Unicode positions. - pos += UTF8::countCodePoints(reinterpret_cast(prev), range.begin - prev); - size_t utf8_len = UTF8::countCodePoints(reinterpret_cast(range.begin), range.end - range.begin); - - for (size_t code_point_index = 0; code_point_index < utf8_len; ++code_point_index) - colors[pos + code_point_index] = it->second; - - pos += utf8_len; - prev = range.end; + if (token_to_color.find(token.type) != token_to_color.end()) + colors[pos + code_point_index] = token_to_color.at(token.type); + else + colors[pos + code_point_index] = unknown_token_color; } - } - Token last_token = token_iterator.max(); - /// Raw data in INSERT queries, which is not necessarily tokenized. - const char * insert_data = ast ? getInsertData(ast) : nullptr; - - /// Highlight the last error in red. If the parser failed or the lexer found an invalid token, - /// or if it didn't parse all the data (except, the data for INSERT query, which is legitimately unparsed) - if ((!parse_res || last_token.isError() || (!token_iterator->isEnd() && token_iterator->type != TokenType::Semicolon)) - && !(insert_data && expected.max_parsed_pos >= insert_data) - && expected.max_parsed_pos >= prev) - { - pos += UTF8::countCodePoints(reinterpret_cast(prev), expected.max_parsed_pos - prev); - - if (pos >= colors.size()) - pos = colors.size() - 1; - - colors[pos] = Replxx::Color::BRIGHTRED; - } - - /// This is a callback for the client/local app to better find query end. Note: this is a kludge, remove it. - if (last_token.type == TokenType::Semicolon || last_token.type == TokenType::VerticalDelimiter - || query.ends_with(';') || query.ends_with("\\G")) /// This is for raw data in INSERT queries, which is not necessarily tokenized. - { - ReplxxLineReader::setLastIsDelimiter(true); - } - else if (last_token.type != TokenType::Whitespace) - { - ReplxxLineReader::setLastIsDelimiter(false); + pos += utf8_len; } } #endif diff --git a/src/Parsers/ASTOrderByElement.cpp b/src/Parsers/ASTOrderByElement.cpp index 09193a8b5e16..be0416359a18 100644 --- a/src/Parsers/ASTOrderByElement.cpp +++ b/src/Parsers/ASTOrderByElement.cpp @@ -1,3 +1,4 @@ +#include #include #include #include diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 2277e348b0f2..49964b5c7281 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -601,8 +601,6 @@ class ParserKeyword : public IParserBase constexpr const char * getName() const override { return s.data(); } - Highlight highlight() const override { return Highlight::keyword; } - protected: bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index dce0bc62d5b5..2c8ab65d1fc6 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -278,7 +278,7 @@ bool ParserTableAsStringLiteralIdentifier::parseImpl(Pos & pos, ASTPtr & node, E bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr id_list; - if (!ParserList(std::make_unique(allow_query_parameter, highlight_type), std::make_unique(TokenType::Dot), false) + if (!ParserList(std::make_unique(allow_query_parameter), std::make_unique(TokenType::Dot), false) .parse(pos, id_list, expected)) return false; @@ -1491,7 +1491,7 @@ const char * ParserAlias::restricted_keywords[] = bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserKeyword s_as(Keyword::AS); - ParserIdentifier id_p(false, Highlight::alias); + ParserIdentifier id_p; bool has_as_word = s_as.ignore(pos, expected); if (!allow_alias_without_as_keyword && !has_as_word) diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index 6dbb75450edd..b29f5cc42510 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -25,15 +25,12 @@ class ParserSubquery : public IParserBase class ParserIdentifier : public IParserBase { public: - explicit ParserIdentifier(bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) - : allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) {} - Highlight highlight() const override { return highlight_type; } + explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} protected: const char * getName() const override { return "identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool allow_query_parameter; - Highlight highlight_type; }; @@ -56,8 +53,8 @@ class ParserTableAsStringLiteralIdentifier : public IParserBase class ParserCompoundIdentifier : public IParserBase { public: - explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false, Highlight highlight_type_ = Highlight::identifier) - : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_), highlight_type(highlight_type_) + explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false) + : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_) { } @@ -66,7 +63,6 @@ class ParserCompoundIdentifier : public IParserBase bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; bool table_name_with_optional_uuid; bool allow_query_parameter; - Highlight highlight_type; }; /** *, t.*, db.table.*, COLUMNS('') APPLY(...) or EXCEPT(...) or REPLACE(...) @@ -257,7 +253,6 @@ class ParserNumber : public IParserBase protected: const char * getName() const override { return "number"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - Highlight highlight() const override { return Highlight::number; } }; /** Unsigned integer, used in right hand side of tuple access operator (x.1). @@ -278,7 +273,6 @@ class ParserStringLiteral : public IParserBase protected: const char * getName() const override { return "string literal"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - Highlight highlight() const override { return Highlight::string; } }; @@ -391,7 +385,6 @@ class ParserSubstitution : public IParserBase protected: const char * getName() const override { return "substitution"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; - Highlight highlight() const override { return Highlight::substitution; } }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 276b4e820742..05691529f430 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -441,21 +441,6 @@ bool ParserKeyValuePairsList::parseImpl(Pos & pos, ASTPtr & node, Expected & exp return parser.parse(pos, node, expected); } -namespace -{ - /// This wrapper is needed to highlight function names differently. - class ParserFunctionName : public IParserBase - { - protected: - const char * getName() const override { return "function name"; } - bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override - { - ParserCompoundIdentifier parser(false, true, Highlight::function); - return parser.parse(pos, node, expected); - } - }; -} - enum class Action { @@ -824,7 +809,6 @@ struct ParserExpressionImpl static const Operator finish_between_operator; - ParserFunctionName function_name_parser; ParserCompoundIdentifier identifier_parser{false, true}; ParserNumber number_parser; ParserAsterisk asterisk_parser; @@ -2375,7 +2359,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr identifier; - if (ParserFunctionName().parse(pos, identifier, expected) + if (ParserCompoundIdentifier(false,true).parse(pos, identifier, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { auto start = getFunctionLayer(identifier, is_table_function, allow_function_parameters); @@ -2513,7 +2497,7 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { if (typeid_cast(layers.back().get()) || typeid_cast(layers.back().get())) { - if (function_name_parser.parse(pos, tmp, expected) + if (identifier_parser.parse(pos, tmp, expected) && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) { layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); @@ -2645,52 +2629,49 @@ Action ParserExpressionImpl::tryParseOperand(Layers & layers, IParser::Pos & pos { layers.back()->pushOperand(std::move(tmp)); } - else + else if (identifier_parser.parse(pos, tmp, expected)) { - old_pos = pos; - if (function_name_parser.parse(pos, tmp, expected) && pos->type == TokenType::OpeningRoundBracket) + if (pos->type == TokenType::OpeningRoundBracket) { ++pos; layers.push_back(getFunctionLayer(tmp, layers.front()->is_table_function)); return Action::OPERAND; } - pos = old_pos; - - if (identifier_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - } - else if (substitution_parser.parse(pos, tmp, expected)) + else { layers.back()->pushOperand(std::move(tmp)); } - else if (pos->type == TokenType::OpeningRoundBracket) - { - - if (subquery_parser.parse(pos, tmp, expected)) - { - layers.back()->pushOperand(std::move(tmp)); - return Action::OPERATOR; - } + } + else if (substitution_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else if (pos->type == TokenType::OpeningRoundBracket) + { - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (pos->type == TokenType::OpeningSquareBracket) - { - ++pos; - layers.push_back(std::make_unique()); - return Action::OPERAND; - } - else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + if (subquery_parser.parse(pos, tmp, expected)) { layers.back()->pushOperand(std::move(tmp)); + return Action::OPERATOR; } - else - { - return Action::NONE; - } + + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (pos->type == TokenType::OpeningSquareBracket) + { + ++pos; + layers.push_back(std::make_unique()); + return Action::OPERAND; + } + else if (mysql_global_variable_parser.parse(pos, tmp, expected)) + { + layers.back()->pushOperand(std::move(tmp)); + } + else + { + return Action::NONE; } return Action::OPERATOR; diff --git a/src/Parsers/IParser.cpp b/src/Parsers/IParser.cpp index eb4ddfa01d24..41981a4bb8aa 100644 --- a/src/Parsers/IParser.cpp +++ b/src/Parsers/IParser.cpp @@ -9,7 +9,6 @@ namespace ErrorCodes extern const int TOO_SLOW_PARSING; } - IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) { depth = rhs.depth; @@ -33,26 +32,4 @@ IParser::Pos & IParser::Pos::operator=(const IParser::Pos & rhs) return *this; } - -template -static bool intersects(T a_begin, T a_end, T b_begin, T b_end) -{ - return (a_begin <= b_begin && b_begin < a_end) - || (b_begin <= a_begin && a_begin < b_end); -} - - -void Expected::highlight(HighlightedRange range) -{ - auto it = highlights.lower_bound(range); - while (it != highlights.end() && range.begin < it->end) - { - if (intersects(range.begin, range.end, it->begin, it->end)) - it = highlights.erase(it); - else - ++it; - } - highlights.insert(range); -} - } diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index f8146c0a4f6d..291f8ee7d44a 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include @@ -22,42 +21,14 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -enum class Highlight -{ - none = 0, - keyword, - identifier, - function, - alias, - substitution, - number, - string, -}; - -struct HighlightedRange -{ - const char * begin; - const char * end; - Highlight highlight; - - auto operator<=>(const HighlightedRange & other) const - { - return begin <=> other.begin; - } -}; - /** Collects variants, how parser could proceed further at rightmost position. - * Also collects a mapping of parsed ranges for highlighting, - * which is accumulated through the parsing. */ struct Expected { absl::InlinedVector variants; const char * max_parsed_pos = nullptr; - std::set highlights; - /// 'description' should be statically allocated string. ALWAYS_INLINE void add(const char * current_pos, const char * description) { @@ -77,8 +48,6 @@ struct Expected { add(it->begin, description); } - - void highlight(HighlightedRange range); }; @@ -189,14 +158,6 @@ class IParser return parse(pos, node, expected); } - /** If the parsed fragment should be highlighted in the query editor, - * which type of highlighting to use? - */ - virtual Highlight highlight() const - { - return Highlight::none; - } - virtual ~IParser() = default; }; diff --git a/src/Parsers/IParserBase.cpp b/src/Parsers/IParserBase.cpp index 9d39056a8f16..0241250926dc 100644 --- a/src/Parsers/IParserBase.cpp +++ b/src/Parsers/IParserBase.cpp @@ -10,25 +10,8 @@ bool IParserBase::parse(Pos & pos, ASTPtr & node, Expected & expected) return wrapParseImpl(pos, IncreaseDepthTag{}, [&] { - const char * begin = pos->begin; bool res = parseImpl(pos, node, expected); - if (res) - { - Highlight type = highlight(); - if (pos->begin > begin && type != Highlight::none) - { - Pos prev_token = pos; - --prev_token; - - HighlightedRange range; - range.begin = begin; - range.end = prev_token->end; - range.highlight = type; - - expected.highlight(range); - } - } - else + if (!res) node = nullptr; return res; }); diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 0bbb181b39c6..9373e6a1c936 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -40,6 +40,7 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_with(Keyword::WITH); ParserToken s_lparen(TokenType::OpeningRoundBracket); ParserToken s_rparen(TokenType::ClosingRoundBracket); + ParserToken s_semicolon(TokenType::Semicolon); ParserIdentifier name_p(true); ParserList columns_p(std::make_unique(), std::make_unique(TokenType::Comma), false); ParserFunction table_function_p{false}; @@ -146,9 +147,8 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// If VALUES is defined in query, everything except setting will be parsed as data, /// and if values followed by semicolon, the data should be null. - if (pos->type != TokenType::Semicolon) + if (!s_semicolon.checkWithoutMoving(pos, expected)) data = pos->begin; - format_str = "Values"; } else if (s_format.ignore(pos, expected)) diff --git a/src/Parsers/parseDatabaseAndTableName.cpp b/src/Parsers/parseDatabaseAndTableName.cpp index eaf020e445bf..81660bc46008 100644 --- a/src/Parsers/parseDatabaseAndTableName.cpp +++ b/src/Parsers/parseDatabaseAndTableName.cpp @@ -60,6 +60,21 @@ bool parseDatabaseAndTableAsAST(IParser::Pos & pos, Expected & expected, ASTPtr } +bool parseDatabase(IParser::Pos & pos, Expected & expected, String & database_str) +{ + ParserToken s_dot(TokenType::Dot); + ParserIdentifier identifier_parser; + + ASTPtr database; + database_str = ""; + + if (!identifier_parser.parse(pos, database, expected)) + return false; + + tryGetIdentifierNameInto(database, database_str); + return true; +} + bool parseDatabaseAsAST(IParser::Pos & pos, Expected & expected, ASTPtr & database) { ParserIdentifier identifier_parser(/* allow_query_parameter */true); diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index 2a6abc234065..51878efa7067 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -226,32 +226,6 @@ std::string getUnmatchedParenthesesErrorMessage( } -static ASTInsertQuery * getInsertAST(const ASTPtr & ast) -{ - /// Either it is INSERT or EXPLAIN INSERT. - if (auto * explain = ast->as()) - { - if (auto explained_query = explain->getExplainedQuery()) - { - return explained_query->as(); - } - } - else - { - return ast->as(); - } - - return nullptr; -} - -const char * getInsertData(const ASTPtr & ast) -{ - if (const ASTInsertQuery * insert = getInsertAST(ast)) - return insert->data; - return nullptr; -} - - ASTPtr tryParseQuery( IParser & parser, const char * & _out_query_end, /* also query begin as input parameter */ @@ -296,11 +270,29 @@ ASTPtr tryParseQuery( if (res && max_parser_depth) res->checkDepth(max_parser_depth); - /// If parsed query ends at data for insertion. Data for insertion could be - /// in any format and not necessary be lexical correct, so we can't perform - /// most of the checks. - if (res && getInsertData(res)) + ASTInsertQuery * insert = nullptr; + if (parse_res) + { + if (auto * explain = res->as()) + { + if (auto explained_query = explain->getExplainedQuery()) + { + insert = explained_query->as(); + } + } + else + { + insert = res->as(); + } + } + + // If parsed query ends at data for insertion. Data for insertion could be + // in any format and not necessary be lexical correct, so we can't perform + // most of the checks. + if (insert && insert->data) + { return res; + } // More granular checks for queries other than INSERT w/inline data. /// Lexical error @@ -442,9 +434,11 @@ std::pair splitMultipartQuery( ast = parseQueryAndMovePosition(parser, pos, end, "", true, max_query_size, max_parser_depth, max_parser_backtracks); - if (ASTInsertQuery * insert = getInsertAST(ast)) + auto * insert = ast->as(); + + if (insert && insert->data) { - /// Data for INSERT is broken on the new line + /// Data for INSERT is broken on new line pos = insert->data; while (*pos && *pos != '\n') ++pos; diff --git a/src/Parsers/parseQuery.h b/src/Parsers/parseQuery.h index 564415d0b85c..93c1a4652671 100644 --- a/src/Parsers/parseQuery.h +++ b/src/Parsers/parseQuery.h @@ -71,9 +71,4 @@ std::pair splitMultipartQuery( size_t max_parser_backtracks, bool allow_settings_after_format_in_insert); -/** If the query contains raw data part, such as INSERT ... FORMAT ..., return a pointer to it. - * The SQL parser stops at the raw data part, which is parsed by a separate parser. - */ -const char * getInsertData(const ASTPtr & ast); - } diff --git a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect index ffd3e742cec8..44f3ba9681a9 100755 --- a/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect +++ b/tests/queries/0_stateless/01370_client_autocomplete_word_break_characters.expect @@ -20,7 +20,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect index 6253840c63cf..ac69c18ce392 100755 --- a/tests/queries/0_stateless/01565_query_loop_after_client_error.expect +++ b/tests/queries/0_stateless/01565_query_loop_after_client_error.expect @@ -24,21 +24,30 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file --highlight 0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --disable_suggestion -mn --history_file=$history_file" expect "\n:) " -send -- "DROP TABLE IF EXISTS t01565;\r" +send -- "DROP TABLE IF EXISTS t01565;\n" +# NOTE: this is important for -mn mode, you should send "\r" only after reading echoed command +expect "\r\n" +send -- "\r" expect "\nOk." expect "\n:)" -send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\r" +send -- "CREATE TABLE t01565 (c0 String, c1 Int32) ENGINE = Memory() ;\n" +expect "\r\n" +send -- "\r" expect "\nOk." expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\r" +send -- "INSERT INTO t01565(c0, c1) VALUES (\"1\",1) ;\n" +expect "\r\n" +send -- "\r" expect "\n:) " -send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\r" +send -- "INSERT INTO t01565(c0, c1) VALUES ('1', 1) ;\n" +expect "\r\n" +send -- "\r" expect "\nOk." expect "\n:) " diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh index f04ffdae229f..ebd6490077e4 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh @@ -43,7 +43,7 @@ expect_after { -i \$any_spawn_id timeout { exit 1 } } -spawn bash -c "$* --highlight 0" +spawn bash -c "$*" expect ":) " # Make a query diff --git a/tests/queries/0_stateless/01702_system_query_log.reference b/tests/queries/0_stateless/01702_system_query_log.reference index 5498b5377ba5..c653021aa5ae 100644 --- a/tests/queries/0_stateless/01702_system_query_log.reference +++ b/tests/queries/0_stateless/01702_system_query_log.reference @@ -43,16 +43,16 @@ Alter ALTER TABLE sqllt.table UPDATE i = i + 1 WHERE 1; Alter ALTER TABLE sqllt.table DELETE WHERE i > 65535; Select -- not done, seems to hard, so I\'ve skipped queries of ALTER-X, where X is:\n-- PARTITION\n-- ORDER BY\n-- SAMPLE BY\n-- INDEX\n-- CONSTRAINT\n-- TTL\n-- USER\n-- QUOTA\n-- ROLE\n-- ROW POLICY\n-- SETTINGS PROFILE\n\nSELECT \'SYSTEM queries\'; System SYSTEM FLUSH LOGS; -System SYSTEM STOP MERGES sqllt.table; -System SYSTEM START MERGES sqllt.table; -System SYSTEM STOP TTL MERGES sqllt.table; -System SYSTEM START TTL MERGES sqllt.table; -System SYSTEM STOP MOVES sqllt.table; -System SYSTEM START MOVES sqllt.table; -System SYSTEM STOP FETCHES sqllt.table; -System SYSTEM START FETCHES sqllt.table; -System SYSTEM STOP REPLICATED SENDS sqllt.table; -System SYSTEM START REPLICATED SENDS sqllt.table; +System SYSTEM STOP MERGES sqllt.table +System SYSTEM START MERGES sqllt.table +System SYSTEM STOP TTL MERGES sqllt.table +System SYSTEM START TTL MERGES sqllt.table +System SYSTEM STOP MOVES sqllt.table +System SYSTEM START MOVES sqllt.table +System SYSTEM STOP FETCHES sqllt.table +System SYSTEM START FETCHES sqllt.table +System SYSTEM STOP REPLICATED SENDS sqllt.table +System SYSTEM START REPLICATED SENDS sqllt.table Select -- SYSTEM RELOAD DICTIONARY sqllt.dictionary; -- temporary out of order: Code: 210, Connection refused (localhost:9001) (version 21.3.1.1)\n-- DROP REPLICA\n-- haha, no\n-- SYSTEM KILL;\n-- SYSTEM SHUTDOWN;\n\n-- Since we don\'t really care about the actual output, suppress it with `FORMAT Null`.\nSELECT \'SHOW queries\'; Show SHOW CREATE TABLE sqllt.table FORMAT Null; Show SHOW CREATE DICTIONARY sqllt.dictionary FORMAT Null; diff --git a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect index 30d725e6a2a2..2d404b005c71 100755 --- a/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect +++ b/tests/queries/0_stateless/02160_client_autocomplete_parse_query.expect @@ -21,7 +21,7 @@ expect_after { -i $any_spawn_id timeout { exit 1 } } -spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file --highlight=0" +spawn bash -c "source $basedir/../shell_config.sh ; \$CLICKHOUSE_CLIENT_BINARY \$CLICKHOUSE_CLIENT_OPT --history_file=$history_file" expect ":) " # Make a query From 0bcceaebbc7dc23b3dc62a2b84be1d73edc017b7 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 10 Apr 2024 18:43:35 +0000 Subject: [PATCH 454/470] Add test for Bug 37909 --- ...3_bug37909_query_does_not_finish.reference | 0 .../03093_bug37909_query_does_not_finish.sql | 77 +++++++++++++++++++ 2 files changed, 77 insertions(+) create mode 100644 tests/queries/0_stateless/03093_bug37909_query_does_not_finish.reference create mode 100644 tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql diff --git a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.reference b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.reference new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql new file mode 100644 index 000000000000..463922c4e29a --- /dev/null +++ b/tests/queries/0_stateless/03093_bug37909_query_does_not_finish.sql @@ -0,0 +1,77 @@ +-- Bug 37909 + +SELECT + v_date AS vDate, + round(sum(v_share)) AS v_sum +FROM +( + WITH + ( + SELECT rand() % 10000 + ) AS dummy_1, + ( + SELECT rand() % 10000 + ) AS dummy_2, + ( + SELECT rand() % 10000 + ) AS dummy_3, + _v AS + ( + SELECT + xxHash64(rand()) % 100000 AS d_id, + toDate(parseDateTimeBestEffort('2022-01-01') + (rand() % 2600000)) AS v_date + FROM numbers(1000000) + ORDER BY d_id ASC + ), + _i AS + ( + SELECT xxHash64(rand()) % 40000 AS d_id + FROM numbers(1000000) + ), + not_i AS + ( + SELECT + NULL AS v_date, + d_id, + 0 AS v_share + FROM _i + LIMIT 100 + ) + SELECT * + FROM + ( + SELECT + d_id, + v_date, + v_share + FROM not_i + UNION ALL + SELECT + d_id, + v_date, + 1 AS v_share + FROM + ( + SELECT + d_id, + arrayJoin(groupArray(v_date)) AS v_date + FROM + ( + SELECT + v_date, + d_id + FROM _v + UNION ALL + SELECT + NULL AS v_date, + d_id + FROM _i + ) + GROUP BY d_id + ) + ) + WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') +) +/* WHERE (v_date >= '2022-05-08') AND (v_date <= '2022-06-07') placing condition has same effect */ +GROUP BY vDate +ORDER BY vDate ASC From 1263e701bc47a56870f0bbf05e59159b254ae450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Wed, 10 Apr 2024 22:08:05 +0200 Subject: [PATCH 455/470] Fix upgrade test. Again --- src/Core/SettingsChangesHistory.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index eb4ab2d44380..bdfb72077244 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -89,7 +89,8 @@ static std::map sett {"ignore_drop_queries_probability", 0, 0, "Allow to ignore drop queries in server with specified probability for testing purposes"}, {"lightweight_deletes_sync", 2, 2, "The same as 'mutation_sync', but controls only execution of lightweight deletes"}, {"query_cache_system_table_handling", "save", "throw", "The query cache no longer caches results of queries against system tables"}, - }}, + {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, + }}, {"24.3", {{"s3_connect_timeout_ms", 1000, 1000, "Introduce new dedicated setting for s3 connection timeout"}, {"allow_experimental_shared_merge_tree", false, true, "The setting is obsolete"}, {"use_page_cache_for_disks_without_file_cache", false, false, "Added userspace page cache"}, @@ -129,7 +130,6 @@ static std::map sett {"azure_max_upload_part_size", 5ull*1024*1024*1024, 5ull*1024*1024*1024, "The maximum size of part to upload during multipart upload to Azure blob storage."}, {"azure_upload_part_size_multiply_factor", 2, 2, "Multiply azure_min_upload_part_size by this factor each time azure_multiply_parts_count_threshold parts were uploaded from a single write to Azure blob storage."}, {"azure_upload_part_size_multiply_parts_count_threshold", 500, 500, "Each time this number of parts was uploaded to Azure blob storage, azure_min_upload_part_size is multiplied by azure_upload_part_size_multiply_factor."}, - {"input_format_hive_text_allow_variable_number_of_columns", false, true, "Ignore extra columns in Hive Text input (if file has more columns than expected) and treat missing fields in Hive Text input as default values."}, }}, {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"}, {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"}, From 358a4cee598b9c2ad2a3e47f2764f5bf69881ba7 Mon Sep 17 00:00:00 2001 From: Nikita Mikhaylov Date: Wed, 10 Apr 2024 23:05:04 +0200 Subject: [PATCH 456/470] Blind fix --- tests/queries/0_stateless/03080_incorrect_join_with_merge.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql index 4985d3abfb65..7682e6ce8667 100644 --- a/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql +++ b/tests/queries/0_stateless/03080_incorrect_join_with_merge.sql @@ -1,5 +1,6 @@ -- https://github.com/ClickHouse/ClickHouse/issues/29838 SET allow_experimental_analyzer=1; +SET distributed_foreground_insert=1; CREATE TABLE first_table_lr ( From 4bb77473a152981cb90e8bc3275c72261a8ed1d5 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 19:08:45 -0400 Subject: [PATCH 457/470] add auth_use_forwarded_address --- .../server-configuration-parameters/settings.md | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f87b6144deb8..33864482afe2 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -42,6 +42,19 @@ Type: UInt32 Default: 1 +## auth_use_forwarded_address + +Use originating address for authentification for clients connected through proxy. + +:::note +This setting should be used with extra caution since forwarded address can be easely spoofed - server accepting such authentication should not be accessed directly but rather exclusively through a trusted proxy. +::: + +Type: Bool + +Default: 0 + + ## background_buffer_flush_schedule_pool_size The maximum number of threads that will be used for performing flush operations for Buffer-engine tables in the background. From e2b7e171dad911796c7c21f11f1e71404fe98568 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 19:19:51 -0400 Subject: [PATCH 458/470] fix --- .../en/operations/server-configuration-parameters/settings.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 33864482afe2..eb93d9cda5bb 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -44,10 +44,10 @@ Default: 1 ## auth_use_forwarded_address -Use originating address for authentification for clients connected through proxy. +Use originating address for authentication for clients connected through proxy. :::note -This setting should be used with extra caution since forwarded address can be easely spoofed - server accepting such authentication should not be accessed directly but rather exclusively through a trusted proxy. +This setting should be used with extra caution since forwarded address can be easily spoofed - server accepting such authentication should not be accessed directly but rather exclusively through a trusted proxy. ::: Type: Bool From 7344daec8f295baf1d11b8b51d82821e53fece19 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 20:25:08 -0400 Subject: [PATCH 459/470] add Composable Protocols --- .../settings/composable-protocols.md | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 docs/en/operations/settings/composable-protocols.md diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md new file mode 100644 index 000000000000..8a5ea584f4e7 --- /dev/null +++ b/docs/en/operations/settings/composable-protocols.md @@ -0,0 +1,155 @@ +--- +slug: /en/operations/settings/composable-protocols +sidebar_position: 64 +sidebar_label: Composable Protocols +--- + +# Composable Protocols + +Composable protocols allows more flexible configuration of TCP access to the ClickHouse server. This configuration can co-exist with or replace conventional configuration. + +## Composable protocols section is denoted as `protocols` in configuration xml +**Example:** +``` xml + + + +``` + +## Basic modules define protocol layers +**Example:** +``` xml + + + + + http + + + +``` +where: +- `plain_http` - name which can be referred by another layer +- `type` - denotes protocol handler which will be instantiated to process data, set of protocol handlers is predefined: + * `tcp` - native clickhouse protocol handler + * `http` - http clickhouse protocol handler + * `tls` - TLS encryption layer + * `proxy1` - PROXYv1 layer + * `mysql` - MySQL compatibility protocol handler + * `postgres` - PostgreSQL compatibility protocol handler + * `prometheus` - Prometheus protocol handler + * `interserver` - clickhouse interserver handler + +:::note +`gRPC` protocol handler is not inmplemented for `Composable protocols` +::: + +## Endpoint (i.e. listening port) is denoted by `` and (optional) `` tags +**Example:** +``` xml + + + + + http + + 127.0.0.1 + 8123 + + + + +``` +If `` is omitted, then `` from root config is used. + +## Layers sequence is defined by `` tag, referencing another module +**Example:** definition for HTTPS protocol +``` xml + + + + + http + + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Endpoint can be attached to any layer +**Example:** definition for HTTP (port 8123) and HTTPS (port 8443) endpoints +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + +``` + +## Additional endpoints can be defined by referencing any module and omitting `` tag +**Example:** `another_http` endpoint is defined for `plain_http` module +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + + + + plain_http + 127.0.0.1 + 8223 + + + +``` + +## Some modules can contain specific for its layer parameters +**Example:** for TLS layer private key (`privateKeyFile`) and certificate files (`certificateFile`) can be specified +``` xml + + + + http + 127.0.0.1 + 8123 + + + + tls + plain_http + 127.0.0.1 + 8443 + another_server.key + another_server.crt + + + +``` From 057747ccd2a6aef76121c3b87a52b22f65e32687 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 20:33:42 -0400 Subject: [PATCH 460/470] fix --- docs/en/operations/settings/composable-protocols.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/settings/composable-protocols.md b/docs/en/operations/settings/composable-protocols.md index 8a5ea584f4e7..b68a5906abf1 100644 --- a/docs/en/operations/settings/composable-protocols.md +++ b/docs/en/operations/settings/composable-protocols.md @@ -41,7 +41,7 @@ where: * `interserver` - clickhouse interserver handler :::note -`gRPC` protocol handler is not inmplemented for `Composable protocols` +`gRPC` protocol handler is not implemented for `Composable protocols` ::: ## Endpoint (i.e. listening port) is denoted by `` and (optional) `` tags From e793b0e148c7db3cd0053b9ba27dc769e50a9878 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Wed, 10 Apr 2024 20:37:26 -0400 Subject: [PATCH 461/470] Update aspell-dict.txt --- utils/check-style/aspell-ignore/en/aspell-dict.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 30c2de2b5076..9f7776f5201d 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -183,6 +183,8 @@ CompiledExpressionCacheCount ComplexKeyCache ComplexKeyDirect ComplexKeyHashed +Composable +composable Config ConnectionDetails Const @@ -697,6 +699,7 @@ PCRE PRCP PREWHERE PROCESSLIST +PROXYv PSUN PagerDuty ParallelFormattingOutputFormatThreads From 742a2f104332ede846b033e00d1761aa2db3a628 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 12 Mar 2024 12:55:22 +0300 Subject: [PATCH 462/470] JOIN filter push down improvements using equivalent sets --- src/Processors/QueryPlan/FilterStep.h | 1 + .../Optimizations/filterPushDown.cpp | 323 +++++++++++++----- 2 files changed, 235 insertions(+), 89 deletions(-) diff --git a/src/Processors/QueryPlan/FilterStep.h b/src/Processors/QueryPlan/FilterStep.h index e020cd3c4d3d..939d0900c867 100644 --- a/src/Processors/QueryPlan/FilterStep.h +++ b/src/Processors/QueryPlan/FilterStep.h @@ -24,6 +24,7 @@ class FilterStep : public ITransformingStep void describeActions(FormatSettings & settings) const override; const ActionsDAGPtr & getExpression() const { return actions_dag; } + ActionsDAGPtr & getExpression() { return actions_dag; } const String & getFilterColumnName() const { return filter_column_name; } bool removesFilterColumn() const { return remove_filter_column; } diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index e71bcc5602aa..82b65adfa510 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -100,7 +100,7 @@ static NameSet findIdentifiersOfNode(const ActionsDAG::Node * node) return res; } -static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & allowed_inputs, size_t child_idx = 0) +static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & available_inputs, size_t child_idx = 0) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -115,13 +115,14 @@ static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & al const auto & all_inputs = child->getInputStreams()[child_idx].header.getColumnsWithTypeAndName(); - auto split_filter = expression->cloneActionsForFilterPushDown(filter_column_name, removes_filter, allowed_inputs, all_inputs); + + auto split_filter = expression->cloneActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); return split_filter; } static size_t -tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, - bool can_remove_filter = true, size_t child_idx = 0) +addNewFilterStepOrThrow(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, const ActionsDAGPtr & split_filter, + bool can_remove_filter = true, size_t child_idx = 0, bool update_parent_filter = true) { QueryPlan::Node * child_node = parent_node->children.front(); checkChildrenSize(child_node, child_idx + 1); @@ -134,21 +135,18 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con const auto & filter_column_name = filter->getFilterColumnName(); const auto * filter_node = expression->tryFindInOutputs(filter_column_name); - if (!filter_node && !filter->removesFilterColumn()) + if (update_parent_filter && !filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", filter_column_name, expression->dumpDAG()); - /// Filter column was replaced to constant. - const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); - - /// Add new Filter step before Aggregating. - /// Expression/Filter -> Aggregating -> Something + /// Add new Filter step before Child. + /// Expression/Filter -> Child -> Something auto & node = nodes.emplace_back(); node.children.emplace_back(&node); std::swap(node.children[0], child_node->children[child_idx]); - /// Expression/Filter -> Aggregating -> Filter -> Something + /// Expression/Filter -> Child -> Filter -> Something /// New filter column is the first one. String split_filter_column_name = split_filter->getOutputs().front()->result_name; @@ -171,12 +169,22 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con ErrorCodes::LOGICAL_ERROR, "We are trying to push down a filter through a step for which we cannot update input stream"); } - if (!filter_node || filter_is_constant) - /// This means that all predicates of filter were pushed down. - /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), expression); - else - filter->updateInputStream(child->getOutputStream()); + if (update_parent_filter) + { + /// Filter column was replaced to constant. + const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); + + if (!filter_node || filter_is_constant) + { + /// This means that all predicates of filter were pushed down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), expression); + } + else + { + filter->updateInputStream(child->getOutputStream()); + } + } return 3; } @@ -186,7 +194,7 @@ tryAddNewFilterStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, con bool can_remove_filter = true, size_t child_idx = 0) { if (auto split_filter = splitFilter(parent_node, allowed_inputs, child_idx)) - return tryAddNewFilterStep(parent_node, nodes, split_filter, can_remove_filter, child_idx); + return addNewFilterStepOrThrow(parent_node, nodes, split_filter, can_remove_filter, child_idx); return 0; } @@ -204,6 +212,212 @@ static size_t simplePushDownOverStep(QueryPlan::Node * parent_node, QueryPlan::N return 0; } +static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes, QueryPlanStepPtr & child) +{ + auto & parent = parent_node->step; + auto * filter = assert_cast(parent.get()); + + auto * join = typeid_cast(child.get()); + auto * filled_join = typeid_cast(child.get()); + + if (!join && !filled_join) + return 0; + + /** For equivalent JOIN with condition `ON lhs.x_1 = rhs.y_1 AND lhs.x_2 = rhs.y_2 ...`, we can build equivalent sets of columns and this + * will allow to push conditions that only use columns from equivalent sets to both sides of JOIN, without considering JOIN type. + * + * For example: `FROM lhs INNER JOIN rhs ON lhs.id = rhs.id AND lhs.value = rhs.value` + * In this example columns `id` and `value` from both tables are equivalent. + * + * During filter push down for different JOIN types filter push down logic is different: + * + * 1. For INNER JOIN we can push all valid conditions to both sides of JOIN. + * 2. For LEFT/RIGHT JOIN we can push conditions that use columns from left/right table to left/right JOIN side, and we can push conditions + * that use only columns from equivalent sets to right/left JOIN side. + * 3. For FULL OUTER JOIN we can push conditions that use only columns from equivalent sets to both JOIN sides. + * + * Additional filter push down optimizations: + * 1. TODO: Support building equivalent sets for more than 2 JOINS. It is possible, but will require more complex analysis step. + * 2. TODO: Support building equivalent sets for JOINs with more than 1 clause. + * 3. TODO: For LEFT/RIGHT join support optimization, we can assume that RIGHT/LEFT columns used in filter will be default/NULL constants and + * check if filter will always be false, in those scenario we can transform LEFT/RIGHT JOIN into INNER JOIN and push conditions to both tables. + */ + + const auto & left_stream_input_header = child->getInputStreams().front().header; + const auto & right_stream_input_header = child->getInputStreams().back().header; + const auto & join_header = child->getOutputStream().header; + const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); + + ActionsDAGPtr left_table_filter_dag = filter->getExpression(); + std::string left_table_filter_column_name = filter->getFilterColumnName(); + + ActionsDAGPtr right_table_filter_dag = filter->getExpression(); + std::string right_table_filter_column_name = filter->getFilterColumnName(); + + std::unordered_map equivalent_left_table_key_column_name_to_right_table_column; + std::unordered_map equivalent_right_table_key_column_name_to_left_table_column; + + bool has_single_clause = table_join.getClauses().size() == 1; + + if (has_single_clause) + { + const auto & join_clause = table_join.getClauses()[0]; + size_t key_names_size = join_clause.key_names_left.size(); + + for (size_t i = 0; i < key_names_size; ++i) + { + const auto & left_table_key_name = join_clause.key_names_left[i]; + const auto & right_table_key_name = join_clause.key_names_right[i]; + + const auto & left_table_column = left_stream_input_header.getByName(left_table_key_name); + const auto & right_table_column = right_stream_input_header.getByName(right_table_key_name); + + if (!left_table_column.type->equals(*right_table_column.type)) + continue; + + equivalent_left_table_key_column_name_to_right_table_column[left_table_key_name] = right_table_column; + equivalent_right_table_key_column_name_to_left_table_column[right_table_key_name] = left_table_column; + } + + const auto & filter_expression = filter->getExpression(); + const auto * filter_expression_node = &filter_expression->findInOutputs(filter->getFilterColumnName()); + + left_table_filter_dag = ActionsDAG::buildFilterActionsDAG({filter_expression_node}, equivalent_right_table_key_column_name_to_left_table_column); + left_table_filter_column_name = left_table_filter_dag->getOutputs()[0]->result_name; + + right_table_filter_dag = ActionsDAG::buildFilterActionsDAG({filter_expression_node}, equivalent_left_table_key_column_name_to_right_table_column); + right_table_filter_column_name = right_table_filter_dag->getOutputs()[0]->result_name; + } + else + { + right_table_filter_dag = right_table_filter_dag->clone(); + } + + ActionsDAGPtr left_stream_pushed_split_filter; + ActionsDAGPtr right_stream_pushed_split_filter; + + auto join_push_down = [&](bool push_to_left_stream, bool filter_push_down_all_input_columns_available) -> size_t + { + const auto push_child_idx = push_to_left_stream ? 0 : 1; + const auto & input_header = push_to_left_stream ? left_stream_input_header : right_stream_input_header; + const auto & input_filter_expression = push_to_left_stream ? left_table_filter_dag : right_table_filter_dag; + const auto & input_filter_column_name = push_to_left_stream ? left_table_filter_column_name : right_table_filter_column_name; + const auto & equivalent_columns_for_filter = push_to_left_stream ? equivalent_left_table_key_column_name_to_right_table_column + : equivalent_right_table_key_column_name_to_left_table_column; + auto & stream_pushed_split_filter = push_to_left_stream ? left_stream_pushed_split_filter : right_stream_pushed_split_filter; + + Names available_input_columns_for_filter; + const auto & input_columns_names = input_header.getNames(); + + for (const auto & name : input_columns_names) + { + /// Skip columns that does not have equivalent column in other stream + if (!filter_push_down_all_input_columns_available && !equivalent_columns_for_filter.contains(name)) + continue; + + /// Skip key if it is renamed. + /// I don't know if it is possible. Just in case. + if (!input_header.has(name) || !join_header.has(name)) + continue; + + /// Skip if type is changed. Push down expression expect equal types. + if (!input_header.getByName(name).type->equals(*join_header.getByName(name).type)) + continue; + + available_input_columns_for_filter.push_back(name); + } + + if (available_input_columns_for_filter.empty()) + return 0; + + stream_pushed_split_filter = input_filter_expression->cloneActionsForFilterPushDown(input_filter_column_name, + filter->removesFilterColumn(), + available_input_columns_for_filter, + input_header.getColumnsWithTypeAndName()); + if (!stream_pushed_split_filter) + return 0; + + /* + * We should check the presence of a split filter column name in `input_columns_names` to avoid removing the required column. + * + * Example: + * A filter expression is `a = c AND b = c`, but `b` and `c` belong to another side of the join and not in `allowed_keys`, so the final split filter is just `a`. + * In this case `a` can be in `input_columns_names` but not `and(a, equals(b, c))`. + * + * New filter column is the first one. + */ + const auto & split_filter_column_name = stream_pushed_split_filter->getOutputs().front()->result_name; + bool can_remove_filter = std::find(input_columns_names.begin(), input_columns_names.end(), split_filter_column_name) != input_columns_names.end(); + const size_t updated_steps = addNewFilterStepOrThrow(parent_node, nodes, stream_pushed_split_filter, can_remove_filter, push_child_idx, false /*update_parent_filter*/); + assert(updated_steps > 0); + + LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), + "Pushed down filter {} to the {} side of join", + split_filter_column_name, + (push_to_left_stream ? JoinKind::Left : JoinKind::Right)); + + return updated_steps; + }; + + bool left_stream_filter_push_down_all_input_columns_available = true; + bool right_stream_filter_push_down_all_input_columns_available = true; + + if (table_join.kind() == JoinKind::Left) + { + right_stream_filter_push_down_all_input_columns_available = false; + } + else if (table_join.kind() == JoinKind::Right) + { + left_stream_filter_push_down_all_input_columns_available = false; + } + else if (table_join.kind() == JoinKind::Full) + { + left_stream_filter_push_down_all_input_columns_available = false; + right_stream_filter_push_down_all_input_columns_available = false; + } + + auto old_filter_expression = filter->getExpression(); + ActionsDAGPtr new_filter_expression; + + size_t left_stream_push_down_updated_steps = join_push_down(true /*push_to_left_stream*/, left_stream_filter_push_down_all_input_columns_available); + size_t right_stream_push_down_updated_steps = 0; + + /** We disable push down to right table in cases: + * 1. Right side is already filled. Example: JOIN with Dictionary. + * 2. ASOF Right join is not supported. + */ + if (join && join->allowPushDownToRight() && table_join.strictness() != JoinStrictness::Asof) + right_stream_push_down_updated_steps = join_push_down(false /*push_to_left_stream*/, right_stream_filter_push_down_all_input_columns_available); + + if (left_stream_push_down_updated_steps || right_stream_push_down_updated_steps) + { + new_filter_expression = std::move(left_table_filter_dag); + + if (table_join.kind() == JoinKind::Right) + new_filter_expression = std::move(right_table_filter_dag); + } + + if (new_filter_expression) + { + const auto * filter_node = new_filter_expression->tryFindInOutputs(filter->getFilterColumnName()); + if (!filter_node && !filter->removesFilterColumn()) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", + filter->getFilterColumnName(), new_filter_expression->dumpDAG()); + + /// Filter column was replaced to constant. + const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); + if (!filter_node || filter_is_constant) + { + /// This means that all predicates of filter were pushed down. + /// Replace current actions to expression, as we don't need to filter anything. + parent = std::make_unique(child->getOutputStream(), new_filter_expression); + } + } + + return left_stream_push_down_updated_steps; +} + size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) { if (parent_node->children.size() != 1) @@ -327,77 +541,8 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (auto updated_steps = simplePushDownOverStep(parent_node, nodes, child)) return updated_steps; - auto * join = typeid_cast(child.get()); - auto * filled_join = typeid_cast(child.get()); - - if (join || filled_join) - { - auto join_push_down = [&](JoinKind kind) -> size_t - { - const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); - - /// Only inner, cross and left(/right) join are supported. Other types may generate default values for left table keys. - /// So, if we push down a condition like `key != 0`, not all rows may be filtered. - if (table_join.kind() != JoinKind::Inner && table_join.kind() != JoinKind::Cross && table_join.kind() != kind) - return 0; - - /// There is no ASOF Right join, so we're talking about pushing to the right side - if (kind == JoinKind::Right && table_join.strictness() == JoinStrictness::Asof) - return 0; - - bool is_left = kind == JoinKind::Left; - const auto & input_header = is_left ? child->getInputStreams().front().header : child->getInputStreams().back().header; - const auto & res_header = child->getOutputStream().header; - Names allowed_keys; - const auto & source_columns = input_header.getNames(); - for (const auto & name : source_columns) - { - /// Skip key if it is renamed. - /// I don't know if it is possible. Just in case. - if (!input_header.has(name) || !res_header.has(name)) - continue; - - /// Skip if type is changed. Push down expression expect equal types. - if (!input_header.getByName(name).type->equals(*res_header.getByName(name).type)) - continue; - - allowed_keys.push_back(name); - } - - /// For left JOIN, push down to the first child; for right - to the second one. - const auto child_idx = is_left ? 0 : 1; - ActionsDAGPtr split_filter = splitFilter(parent_node, allowed_keys, child_idx); - if (!split_filter) - return 0; - /* - * We should check the presence of a split filter column name in `source_columns` to avoid removing the required column. - * - * Example: - * A filter expression is `a AND b = c`, but `b` and `c` belong to another side of the join and not in `allowed_keys`, so the final split filter is just `a`. - * In this case `a` can be in `source_columns` but not `and(a, equals(b, c))`. - * - * New filter column is the first one. - */ - const String & split_filter_column_name = split_filter->getOutputs().front()->result_name; - bool can_remove_filter = source_columns.end() == std::find(source_columns.begin(), source_columns.end(), split_filter_column_name); - const size_t updated_steps = tryAddNewFilterStep(parent_node, nodes, split_filter, can_remove_filter, child_idx); - if (updated_steps > 0) - { - LOG_DEBUG(getLogger("QueryPlanOptimizations"), "Pushed down filter {} to the {} side of join", split_filter_column_name, kind); - } - return updated_steps; - }; - - if (size_t updated_steps = join_push_down(JoinKind::Left)) - return updated_steps; - - /// For full sorting merge join we push down both to the left and right tables, because left and right streams are not independent. - if (join && join->allowPushDownToRight()) - { - if (size_t updated_steps = join_push_down(JoinKind::Right)) - return updated_steps; - } - } + if (auto updated_steps = tryPushDownOverJoinStep(parent_node, nodes, child)) + return updated_steps; /// TODO. /// We can filter earlier if expression does not depend on WITH FILL columns. From 77a7d0027a597a3cb026b25d3757ba0103211468 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 12 Mar 2024 15:02:02 +0300 Subject: [PATCH 463/470] Updated implementation --- .../Optimizations/filterPushDown.cpp | 23 +++++++++++++------ 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 82b65adfa510..8a77022005f8 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -241,6 +241,8 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: * 2. TODO: Support building equivalent sets for JOINs with more than 1 clause. * 3. TODO: For LEFT/RIGHT join support optimization, we can assume that RIGHT/LEFT columns used in filter will be default/NULL constants and * check if filter will always be false, in those scenario we can transform LEFT/RIGHT JOIN into INNER JOIN and push conditions to both tables. + * 4. TODO: It is possible to pull up filter conditions from LEFT/RIGHT stream and push conditions that use only columns from equivalent sets + * to RIGHT/LEFT stream. */ const auto & left_stream_input_header = child->getInputStreams().front().header; @@ -311,20 +313,27 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: for (const auto & name : input_columns_names) { + auto input_name = name; + /// Skip columns that does not have equivalent column in other stream - if (!filter_push_down_all_input_columns_available && !equivalent_columns_for_filter.contains(name)) - continue; + if (!filter_push_down_all_input_columns_available) + { + auto it = equivalent_columns_for_filter.find(name); + if (it == equivalent_columns_for_filter.end()) + continue; + + if (!join_header.has(input_name)) + input_name = it->second.name; + } - /// Skip key if it is renamed. - /// I don't know if it is possible. Just in case. - if (!input_header.has(name) || !join_header.has(name)) + if (!join_header.has(input_name)) continue; /// Skip if type is changed. Push down expression expect equal types. - if (!input_header.getByName(name).type->equals(*join_header.getByName(name).type)) + if (!input_header.getByName(input_name).type->equals(*join_header.getByName(input_name).type)) continue; - available_input_columns_for_filter.push_back(name); + available_input_columns_for_filter.push_back(input_name); } if (available_input_columns_for_filter.empty()) From fb5e622707eb0051c725e3f19271a55819b325a2 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 12 Mar 2024 15:37:03 +0300 Subject: [PATCH 464/470] Fixed tests --- .../QueryPlan/Optimizations/filterPushDown.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 8a77022005f8..f6b859c31850 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -357,7 +357,12 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: */ const auto & split_filter_column_name = stream_pushed_split_filter->getOutputs().front()->result_name; bool can_remove_filter = std::find(input_columns_names.begin(), input_columns_names.end(), split_filter_column_name) != input_columns_names.end(); - const size_t updated_steps = addNewFilterStepOrThrow(parent_node, nodes, stream_pushed_split_filter, can_remove_filter, push_child_idx, false /*update_parent_filter*/); + const size_t updated_steps = addNewFilterStepOrThrow(parent_node, + nodes, + stream_pushed_split_filter, + can_remove_filter, + push_child_idx, + false /*update_parent_filter*/); assert(updated_steps > 0); LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), @@ -420,7 +425,8 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: { /// This means that all predicates of filter were pushed down. /// Replace current actions to expression, as we don't need to filter anything. - parent = std::make_unique(child->getOutputStream(), new_filter_expression); + auto forward_columns_actions = std::make_shared(child->getOutputStream().header.getColumnsWithTypeAndName()); + parent = std::make_unique(child->getOutputStream(), std::move(forward_columns_actions)); } } From 0dcd70b5a534dae03ac1156703b5c166d6314bf0 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 12 Mar 2024 15:39:51 +0300 Subject: [PATCH 465/470] Added performance tests --- .../join_filter_pushdown_equivalent_sets.xml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/performance/join_filter_pushdown_equivalent_sets.xml diff --git a/tests/performance/join_filter_pushdown_equivalent_sets.xml b/tests/performance/join_filter_pushdown_equivalent_sets.xml new file mode 100644 index 000000000000..a75e12cc5fc5 --- /dev/null +++ b/tests/performance/join_filter_pushdown_equivalent_sets.xml @@ -0,0 +1,24 @@ + + CREATE TABLE test_table_1(id UInt64, value String) ENGINE=MergeTree ORDER BY id + CREATE TABLE test_table_2(id UInt64, value String) ENGINE=MergeTree ORDER BY id + + INSERT INTO test_table_1 SELECT number, number FROM numbers(5000000) + INSERT INTO test_table_2 SELECT number, number FROM numbers(5000000) + + + + join_kind + + INNER + LEFT + RIGHT + FULL + + + + + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs {join_kind} JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + + DROP TABLE test_table_1 + DROP TABLE test_table_2 + From 1218cf1568e7aae8f1853deede0c703bc19c4e61 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 9 Apr 2024 14:39:08 +0300 Subject: [PATCH 466/470] Updated implementation --- src/Interpreters/ActionsDAG.cpp | 245 +++++- src/Interpreters/ActionsDAG.h | 42 +- .../Optimizations/filterPushDown.cpp | 227 +++--- .../join_filter_pushdown_equivalent_sets.xml | 18 +- .../01655_plan_optimizations.reference | 6 +- .../0_stateless/01655_plan_optimizations.sh | 4 +- .../02861_filter_pushdown_const_bug.reference | 6 + ...filter_push_down_equivalent_sets.reference | 710 ++++++++++++++++++ ..._join_filter_push_down_equivalent_sets.sql | 131 ++++ 9 files changed, 1223 insertions(+), 166 deletions(-) create mode 100644 tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference create mode 100644 tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 09e9364a3f1d..e6a489c5785a 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2135,13 +2135,6 @@ ConjunctionNodes getConjunctionNodes(ActionsDAG::Node * predicate, std::unordere } } - // std::cerr << "Allowed " << conjunction.allowed.size() << std::endl; - // for (const auto & node : conjunction.allowed) - // std::cerr << node->result_name << std::endl; - // std::cerr << "Rejected " << conjunction.rejected.size() << std::endl; - // for (const auto & node : conjunction.rejected) - // std::cerr << node->result_name << std::endl; - return conjunction; } @@ -2170,7 +2163,7 @@ ColumnsWithTypeAndName prepareFunctionArguments(const ActionsDAG::NodeRawConstPt /// /// Result actions add single column with conjunction result (it is always first in outputs). /// No other columns are added or removed. -ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) +ActionsDAGPtr ActionsDAG::createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs) { if (conjunction.empty()) return nullptr; @@ -2265,9 +2258,9 @@ ActionsDAGPtr ActionsDAG::cloneActionsForConjunction(NodeRawConstPtrs conjunctio return actions; } -ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( +ActionsDAGPtr ActionsDAG::splitActionsForFilterPushDown( const std::string & filter_name, - bool can_remove_filter, + bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs) { @@ -2321,16 +2314,230 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( } } - auto actions = cloneActionsForConjunction(conjunction.allowed, all_inputs); + auto actions = createActionsForConjunction(conjunction.allowed, all_inputs); if (!actions) return nullptr; /// Now, when actions are created, update the current DAG. + removeUnusedConjunctions(std::move(conjunction.rejected), predicate, removes_filter); + + return actions; +} + +ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPushDown( + const std::string & filter_name, + bool removes_filter, + const Names & left_stream_available_columns_to_push_down, + const ColumnsWithTypeAndName & left_stream_all_inputs, + const Names & right_stream_available_columns_to_push_down, + const ColumnsWithTypeAndName & right_stream_all_inputs, + const Names & equivalent_columns_to_push_down, + const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, + const std::unordered_map & equivalent_right_stream_column_to_left_stream_column) +{ + Node * predicate = const_cast(tryFindInOutputs(filter_name)); + if (!predicate) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Output nodes for ActionsDAG do not contain filter column name {}. DAG:\n{}", + filter_name, + dumpDAG()); + + /// If condition is constant let's do nothing. + /// It means there is nothing to push down or optimization was already applied. + if (predicate->type == ActionType::COLUMN) + return {}; + + auto get_input_nodes = [this](const Names & inputs_names) + { + std::unordered_set allowed_nodes; + + std::unordered_map> inputs_map; + for (const auto & input_node : inputs) + inputs_map[input_node->result_name].emplace_back(input_node); + + for (const auto & name : inputs_names) + { + auto & inputs_list = inputs_map[name]; + if (inputs_list.empty()) + continue; + + allowed_nodes.emplace(inputs_list.front()); + inputs_list.pop_front(); + } + + return allowed_nodes; + }; + + auto left_stream_allowed_nodes = get_input_nodes(left_stream_available_columns_to_push_down); + auto right_stream_allowed_nodes = get_input_nodes(right_stream_available_columns_to_push_down); + auto both_streams_allowed_nodes = get_input_nodes(equivalent_columns_to_push_down); + + auto left_stream_push_down_conjunctions = getConjunctionNodes(predicate, left_stream_allowed_nodes); + auto right_stream_push_down_conjunctions = getConjunctionNodes(predicate, right_stream_allowed_nodes); + auto both_streams_push_down_conjunctions = getConjunctionNodes(predicate, both_streams_allowed_nodes); + + NodeRawConstPtrs left_stream_allowed_conjunctions = std::move(left_stream_push_down_conjunctions.allowed); + NodeRawConstPtrs right_stream_allowed_conjunctions = std::move(right_stream_push_down_conjunctions.allowed); + + std::unordered_set left_stream_allowed_conjunctions_set(left_stream_allowed_conjunctions.begin(), left_stream_allowed_conjunctions.end()); + std::unordered_set right_stream_allowed_conjunctions_set(right_stream_allowed_conjunctions.begin(), right_stream_allowed_conjunctions.end()); + + for (const auto * both_streams_push_down_allowed_conjunction_node : both_streams_push_down_conjunctions.allowed) + { + if (!left_stream_allowed_conjunctions_set.contains(both_streams_push_down_allowed_conjunction_node)) + left_stream_allowed_conjunctions.push_back(both_streams_push_down_allowed_conjunction_node); + + if (!right_stream_allowed_conjunctions_set.contains(both_streams_push_down_allowed_conjunction_node)) + right_stream_allowed_conjunctions.push_back(both_streams_push_down_allowed_conjunction_node); + } + + std::unordered_set rejected_conjunctions_set; + rejected_conjunctions_set.insert(left_stream_push_down_conjunctions.rejected.begin(), left_stream_push_down_conjunctions.rejected.end()); + rejected_conjunctions_set.insert(right_stream_push_down_conjunctions.rejected.begin(), right_stream_push_down_conjunctions.rejected.end()); + rejected_conjunctions_set.insert(both_streams_push_down_conjunctions.rejected.begin(), both_streams_push_down_conjunctions.rejected.end()); + + for (const auto & left_stream_allowed_conjunction : left_stream_allowed_conjunctions) + rejected_conjunctions_set.erase(left_stream_allowed_conjunction); + + for (const auto & right_stream_allowed_conjunction : right_stream_allowed_conjunctions) + rejected_conjunctions_set.erase(right_stream_allowed_conjunction); + + NodeRawConstPtrs rejected_conjunctions(rejected_conjunctions_set.begin(), rejected_conjunctions_set.end()); + + if (rejected_conjunctions.size() == 1) + { + chassert(rejected_conjunctions.front()->result_type); + + bool left_stream_push_constant = !left_stream_allowed_conjunctions.empty() && left_stream_allowed_conjunctions[0]->type == ActionType::COLUMN; + bool right_stream_push_constant = !right_stream_allowed_conjunctions.empty() && right_stream_allowed_conjunctions[0]->type == ActionType::COLUMN; + + if ((left_stream_push_constant || right_stream_push_constant) && !rejected_conjunctions.front()->result_type->equals(*predicate->result_type)) + { + /// No further optimization can be done + return {}; + } + } + + auto left_stream_filter_to_push_down = createActionsForConjunction(left_stream_allowed_conjunctions, left_stream_all_inputs); + auto right_stream_filter_to_push_down = createActionsForConjunction(right_stream_allowed_conjunctions, right_stream_all_inputs); + + auto replace_equivalent_columns_in_filter = [](const ActionsDAGPtr & filter, + const ColumnsWithTypeAndName & stream_inputs, + const std::unordered_map & columns_to_replace) + { + auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter->getOutputs()[0]}, columns_to_replace); + chassert(updated_filter->getOutputs().size() == 1); + + std::unordered_map> updated_filter_inputs; + + for (const auto & input : updated_filter->getInputs()) + updated_filter_inputs[input->result_name].push_back(input); + + for (const auto & input : filter->getInputs()) + { + if (updated_filter_inputs.contains(input->result_name)) + continue; + + const Node * updated_filter_input_node = nullptr; + + auto it = columns_to_replace.find(input->result_name); + if (it != columns_to_replace.end()) + updated_filter_input_node = &updated_filter->addInput(it->second); + else + updated_filter_input_node = &updated_filter->addInput({input->column, input->result_type, input->result_name}); + + updated_filter_inputs[input->result_name].push_back(updated_filter_input_node); + } + + for (const auto & input_column : stream_inputs) + { + const Node * input; + auto & list = updated_filter_inputs[input_column.name]; + if (list.empty()) + { + input = &updated_filter->addInput(input_column); + } + else + { + input = list.front(); + list.pop_front(); + } + + if (input != updated_filter->getOutputs()[0]) + updated_filter->outputs.push_back(input); + } + + return updated_filter; + }; + + if (left_stream_filter_to_push_down) + left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(left_stream_filter_to_push_down, + left_stream_all_inputs, + equivalent_right_stream_column_to_left_stream_column); + + if (right_stream_filter_to_push_down) + right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(right_stream_filter_to_push_down, + right_stream_all_inputs, + equivalent_left_stream_column_to_right_stream_column); + + /* + * We should check the presence of a split filter column name in stream columns to avoid removing the required column. + * + * Example: + * A filter expression is `a AND b = c`, but `b` and `c` belong to another side of the join and not in allowed columns to push down, + * so the final split filter is just `a`. + * In this case `a` can be in stream columns but not `and(a, equals(b, c))`. + */ + + bool left_stream_filter_removes_filter = true; + bool right_stream_filter_removes_filter = true; + + auto columns_have_column_with_name = [](const ColumnsWithTypeAndName & columns, const std::string & column_name) + { + for (const auto & column : columns) + { + if (column.name == column_name) + return true; + } + + return false; + }; + + if (left_stream_filter_to_push_down) + { + const auto & left_stream_filter_column_name = left_stream_filter_to_push_down->getOutputs()[0]->result_name; + left_stream_filter_removes_filter = !columns_have_column_with_name(left_stream_all_inputs, left_stream_filter_column_name); + } + + if (right_stream_filter_to_push_down) + { + const auto & right_stream_filter_column_name = right_stream_filter_to_push_down->getOutputs()[0]->result_name; + right_stream_filter_removes_filter = !columns_have_column_with_name(right_stream_all_inputs, right_stream_filter_column_name); + } + + ActionsDAG::ActionsForJOINFilterPushDown result + { + .left_stream_filter_to_push_down = std::move(left_stream_filter_to_push_down), + .left_stream_filter_removes_filter = left_stream_filter_removes_filter, + .right_stream_filter_to_push_down = std::move(right_stream_filter_to_push_down), + .right_stream_filter_removes_filter = right_stream_filter_removes_filter + }; + + if (!result.left_stream_filter_to_push_down && !result.right_stream_filter_to_push_down) + return result; + + /// Now, when actions are created, update the current DAG. + removeUnusedConjunctions(std::move(rejected_conjunctions), predicate, removes_filter); - if (conjunction.rejected.empty()) + return result; +} + +void ActionsDAG::removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter) +{ + if (rejected_conjunctions.empty()) { /// The whole predicate was split. - if (can_remove_filter) + if (removes_filter) { /// If filter column is not needed, remove it from output nodes. std::erase_if(outputs, [&](const Node * node) { return node == predicate; }); @@ -2362,7 +2569,7 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( { /// Predicate is conjunction, where both allowed and rejected sets are not empty. - NodeRawConstPtrs new_children = std::move(conjunction.rejected); + NodeRawConstPtrs new_children = std::move(rejected_conjunctions); if (new_children.size() == 1 && new_children.front()->result_type->equals(*predicate->result_type)) { @@ -2403,13 +2610,12 @@ ActionsDAGPtr ActionsDAG::cloneActionsForFilterPushDown( std::unordered_set used_inputs; for (const auto * input : inputs) { - if (can_remove_filter && input == predicate) + if (removes_filter && input == predicate) continue; used_inputs.insert(input); } removeUnusedActions(used_inputs); - return actions; } static bool isColumnSortingPreserved(const ActionsDAG::Node * start_node, const String & sorted_column) @@ -2557,8 +2763,11 @@ ActionsDAGPtr ActionsDAG::buildFilterActionsDAG( auto input_node_it = node_name_to_input_node_column.find(node->result_name); if (input_node_it != node_name_to_input_node_column.end()) { - result_node = &result_dag->addInput(input_node_it->second); - node_to_result_node.emplace(node, result_node); + auto & result_input = result_inputs[input_node_it->second.name]; + if (!result_input) + result_input = &result_dag->addInput(input_node_it->second); + + node_to_result_node.emplace(node, result_input); nodes_to_process.pop_back(); continue; } diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 469fe9ea7f17..278ea4e5ca4c 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -372,12 +372,46 @@ class ActionsDAG /// columns will be transformed like `x, y, z` -> `z > 0, z, x, y` -(remove filter)-> `z, x, y`. /// To avoid it, add inputs from `all_inputs` list, /// so actions `x, y, z -> z > 0, x, y, z` -(remove filter)-> `x, y, z` will not change columns order. - ActionsDAGPtr cloneActionsForFilterPushDown( + ActionsDAGPtr splitActionsForFilterPushDown( const std::string & filter_name, - bool can_remove_filter, + bool removes_filter, const Names & available_inputs, const ColumnsWithTypeAndName & all_inputs); + struct ActionsForJOINFilterPushDown + { + ActionsDAGPtr left_stream_filter_to_push_down; + bool left_stream_filter_removes_filter; + ActionsDAGPtr right_stream_filter_to_push_down; + bool right_stream_filter_removes_filter; + }; + + /** Split actions for JOIN filter push down. + * + * @param filter_name - name of filter node in current DAG. + * @param removes_filter - if filter is removed after it is applied. + * @param left_stream_available_columns_to_push_down - columns from left stream that are safe to use in push down conditions + * to left stream. + * @param left_stream_all_inputs - all left streams columns. + * @param right_stream_available_columns_to_push_down - columns from right stream that are safe to use in push down conditions + * to right stream. + * @param right_stream_all_inputs - all right stream columns. + * @param equivalent_columns_to_push_down - columns from left and right streams that are safe to use in push down conditions + * to left and right streams. + * @param equivalent_left_stream_column_to_right_stream_column - equivalent left stream column name to right stream column map. + * @param equivalent_right_stream_column_to_left_stream_column - equivalent right stream column name to left stream column map. + */ + ActionsForJOINFilterPushDown splitActionsForJOINFilterPushDown( + const std::string & filter_name, + bool removes_filter, + const Names & left_stream_available_columns_to_push_down, + const ColumnsWithTypeAndName & left_stream_all_inputs, + const Names & right_stream_available_columns_to_push_down, + const ColumnsWithTypeAndName & right_stream_all_inputs, + const Names & equivalent_columns_to_push_down, + const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, + const std::unordered_map & equivalent_right_stream_column_to_left_stream_column); + bool isSortingPreserved(const Block & input_header, const SortDescription & sort_description, const String & ignore_output_column = "") const; @@ -429,7 +463,9 @@ class ActionsDAG void compileFunctions(size_t min_count_to_compile_expression, const std::unordered_set & lazy_executed_nodes = {}); #endif - static ActionsDAGPtr cloneActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + static ActionsDAGPtr createActionsForConjunction(NodeRawConstPtrs conjunction, const ColumnsWithTypeAndName & all_inputs); + + void removeUnusedConjunctions(NodeRawConstPtrs rejected_conjunctions, Node * predicate, bool removes_filter); }; class FindOriginalNodeForOutputName diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index f6b859c31850..f59b4dafbae0 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -114,10 +114,7 @@ static ActionsDAGPtr splitFilter(QueryPlan::Node * parent_node, const Names & av bool removes_filter = filter->removesFilterColumn(); const auto & all_inputs = child->getInputStreams()[child_idx].header.getColumnsWithTypeAndName(); - - - auto split_filter = expression->cloneActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); - return split_filter; + return expression->splitActionsForFilterPushDown(filter_column_name, removes_filter, available_inputs, all_inputs); } static size_t @@ -231,18 +228,18 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: * * During filter push down for different JOIN types filter push down logic is different: * - * 1. For INNER JOIN we can push all valid conditions to both sides of JOIN. - * 2. For LEFT/RIGHT JOIN we can push conditions that use columns from left/right table to left/right JOIN side, and we can push conditions - * that use only columns from equivalent sets to right/left JOIN side. - * 3. For FULL OUTER JOIN we can push conditions that use only columns from equivalent sets to both JOIN sides. + * 1. For INNER JOIN we can push all valid conditions to both sides of JOIN. We also can push all valid conditions that use columns from + * equivalent sets to both sides of JOIN. + * 2. For LEFT/RIGHT JOIN we can push conditions that use columns from LEFT/RIGHT stream to LEFT/RIGHT JOIN side. We can also push conditions + * that use columns from LEFT/RIGHT equivalent sets to RIGHT/LEFT JOIN side. * * Additional filter push down optimizations: * 1. TODO: Support building equivalent sets for more than 2 JOINS. It is possible, but will require more complex analysis step. * 2. TODO: Support building equivalent sets for JOINs with more than 1 clause. - * 3. TODO: For LEFT/RIGHT join support optimization, we can assume that RIGHT/LEFT columns used in filter will be default/NULL constants and + * 3. TODO: For LEFT/RIGHT JOIN, we can assume that RIGHT/LEFT columns used in filter will be default/NULL constants and * check if filter will always be false, in those scenario we can transform LEFT/RIGHT JOIN into INNER JOIN and push conditions to both tables. - * 4. TODO: It is possible to pull up filter conditions from LEFT/RIGHT stream and push conditions that use only columns from equivalent sets - * to RIGHT/LEFT stream. + * 4. TODO: It is possible to pull up filter conditions from LEFT/RIGHT stream and push conditions that use columns from LEFT/RIGHT equivalent sets + * to RIGHT/LEFT JOIN side. */ const auto & left_stream_input_header = child->getInputStreams().front().header; @@ -250,14 +247,11 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: const auto & join_header = child->getOutputStream().header; const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); - ActionsDAGPtr left_table_filter_dag = filter->getExpression(); - std::string left_table_filter_column_name = filter->getFilterColumnName(); - - ActionsDAGPtr right_table_filter_dag = filter->getExpression(); - std::string right_table_filter_column_name = filter->getFilterColumnName(); + if (table_join.kind() == JoinKind::Full) + return 0; - std::unordered_map equivalent_left_table_key_column_name_to_right_table_column; - std::unordered_map equivalent_right_table_key_column_name_to_left_table_column; + std::unordered_map equivalent_left_stream_column_to_right_stream_column; + std::unordered_map equivalent_right_stream_column_to_left_stream_column; bool has_single_clause = table_join.getClauses().size() == 1; @@ -271,166 +265,146 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: const auto & left_table_key_name = join_clause.key_names_left[i]; const auto & right_table_key_name = join_clause.key_names_right[i]; + if (!join_header.has(left_table_key_name) || !join_header.has(right_table_key_name)) + continue; + const auto & left_table_column = left_stream_input_header.getByName(left_table_key_name); const auto & right_table_column = right_stream_input_header.getByName(right_table_key_name); if (!left_table_column.type->equals(*right_table_column.type)) continue; - equivalent_left_table_key_column_name_to_right_table_column[left_table_key_name] = right_table_column; - equivalent_right_table_key_column_name_to_left_table_column[right_table_key_name] = left_table_column; + equivalent_left_stream_column_to_right_stream_column[left_table_key_name] = right_table_column; + equivalent_right_stream_column_to_left_stream_column[right_table_key_name] = left_table_column; } - - const auto & filter_expression = filter->getExpression(); - const auto * filter_expression_node = &filter_expression->findInOutputs(filter->getFilterColumnName()); - - left_table_filter_dag = ActionsDAG::buildFilterActionsDAG({filter_expression_node}, equivalent_right_table_key_column_name_to_left_table_column); - left_table_filter_column_name = left_table_filter_dag->getOutputs()[0]->result_name; - - right_table_filter_dag = ActionsDAG::buildFilterActionsDAG({filter_expression_node}, equivalent_left_table_key_column_name_to_right_table_column); - right_table_filter_column_name = right_table_filter_dag->getOutputs()[0]->result_name; } - else + + auto get_available_columns_for_filter = [&](bool push_to_left_stream, bool filter_push_down_input_columns_available) { - right_table_filter_dag = right_table_filter_dag->clone(); - } + Names available_input_columns_for_filter; - ActionsDAGPtr left_stream_pushed_split_filter; - ActionsDAGPtr right_stream_pushed_split_filter; + if (!filter_push_down_input_columns_available) + return available_input_columns_for_filter; - auto join_push_down = [&](bool push_to_left_stream, bool filter_push_down_all_input_columns_available) -> size_t - { - const auto push_child_idx = push_to_left_stream ? 0 : 1; const auto & input_header = push_to_left_stream ? left_stream_input_header : right_stream_input_header; - const auto & input_filter_expression = push_to_left_stream ? left_table_filter_dag : right_table_filter_dag; - const auto & input_filter_column_name = push_to_left_stream ? left_table_filter_column_name : right_table_filter_column_name; - const auto & equivalent_columns_for_filter = push_to_left_stream ? equivalent_left_table_key_column_name_to_right_table_column - : equivalent_right_table_key_column_name_to_left_table_column; - auto & stream_pushed_split_filter = push_to_left_stream ? left_stream_pushed_split_filter : right_stream_pushed_split_filter; - - Names available_input_columns_for_filter; const auto & input_columns_names = input_header.getNames(); for (const auto & name : input_columns_names) { - auto input_name = name; - - /// Skip columns that does not have equivalent column in other stream - if (!filter_push_down_all_input_columns_available) - { - auto it = equivalent_columns_for_filter.find(name); - if (it == equivalent_columns_for_filter.end()) - continue; - - if (!join_header.has(input_name)) - input_name = it->second.name; - } - - if (!join_header.has(input_name)) + if (!join_header.has(name)) continue; /// Skip if type is changed. Push down expression expect equal types. - if (!input_header.getByName(input_name).type->equals(*join_header.getByName(input_name).type)) + if (!input_header.getByName(name).type->equals(*join_header.getByName(name).type)) continue; - available_input_columns_for_filter.push_back(input_name); + available_input_columns_for_filter.push_back(name); } - if (available_input_columns_for_filter.empty()) - return 0; - - stream_pushed_split_filter = input_filter_expression->cloneActionsForFilterPushDown(input_filter_column_name, - filter->removesFilterColumn(), - available_input_columns_for_filter, - input_header.getColumnsWithTypeAndName()); - if (!stream_pushed_split_filter) - return 0; - - /* - * We should check the presence of a split filter column name in `input_columns_names` to avoid removing the required column. - * - * Example: - * A filter expression is `a = c AND b = c`, but `b` and `c` belong to another side of the join and not in `allowed_keys`, so the final split filter is just `a`. - * In this case `a` can be in `input_columns_names` but not `and(a, equals(b, c))`. - * - * New filter column is the first one. - */ - const auto & split_filter_column_name = stream_pushed_split_filter->getOutputs().front()->result_name; - bool can_remove_filter = std::find(input_columns_names.begin(), input_columns_names.end(), split_filter_column_name) != input_columns_names.end(); - const size_t updated_steps = addNewFilterStepOrThrow(parent_node, - nodes, - stream_pushed_split_filter, - can_remove_filter, - push_child_idx, - false /*update_parent_filter*/); - assert(updated_steps > 0); - - LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), - "Pushed down filter {} to the {} side of join", - split_filter_column_name, - (push_to_left_stream ? JoinKind::Left : JoinKind::Right)); - - return updated_steps; + return available_input_columns_for_filter; }; - bool left_stream_filter_push_down_all_input_columns_available = true; - bool right_stream_filter_push_down_all_input_columns_available = true; + bool left_stream_filter_push_down_input_columns_available = true; + bool right_stream_filter_push_down_input_columns_available = true; if (table_join.kind() == JoinKind::Left) - { - right_stream_filter_push_down_all_input_columns_available = false; - } + right_stream_filter_push_down_input_columns_available = false; else if (table_join.kind() == JoinKind::Right) + left_stream_filter_push_down_input_columns_available = false; + + /** We disable push down to right table in cases: + * 1. Right side is already filled. Example: JOIN with Dictionary. + * 2. ASOF Right join is not supported. + */ + bool allow_push_down_to_right = join && join->allowPushDownToRight() && table_join.strictness() != JoinStrictness::Asof; + if (!allow_push_down_to_right) + right_stream_filter_push_down_input_columns_available = false; + + Names equivalent_columns_to_push_down; + + if (left_stream_filter_push_down_input_columns_available) { - left_stream_filter_push_down_all_input_columns_available = false; + for (const auto & [name, _] : equivalent_left_stream_column_to_right_stream_column) + equivalent_columns_to_push_down.push_back(name); } - else if (table_join.kind() == JoinKind::Full) + + if (right_stream_filter_push_down_input_columns_available) { - left_stream_filter_push_down_all_input_columns_available = false; - right_stream_filter_push_down_all_input_columns_available = false; + for (const auto & [name, _] : equivalent_right_stream_column_to_left_stream_column) + equivalent_columns_to_push_down.push_back(name); } - auto old_filter_expression = filter->getExpression(); - ActionsDAGPtr new_filter_expression; + Names left_stream_available_columns_to_push_down = get_available_columns_for_filter(true /*push_to_left_stream*/, left_stream_filter_push_down_input_columns_available); + Names right_stream_available_columns_to_push_down = get_available_columns_for_filter(false /*push_to_left_stream*/, right_stream_filter_push_down_input_columns_available); - size_t left_stream_push_down_updated_steps = join_push_down(true /*push_to_left_stream*/, left_stream_filter_push_down_all_input_columns_available); - size_t right_stream_push_down_updated_steps = 0; + auto join_filter_push_down_actions = filter->getExpression()->splitActionsForJOINFilterPushDown(filter->getFilterColumnName(), + filter->removesFilterColumn(), + left_stream_available_columns_to_push_down, + left_stream_input_header.getColumnsWithTypeAndName(), + right_stream_available_columns_to_push_down, + right_stream_input_header.getColumnsWithTypeAndName(), + equivalent_columns_to_push_down, + equivalent_left_stream_column_to_right_stream_column, + equivalent_right_stream_column_to_left_stream_column); - /** We disable push down to right table in cases: - * 1. Right side is already filled. Example: JOIN with Dictionary. - * 2. ASOF Right join is not supported. - */ - if (join && join->allowPushDownToRight() && table_join.strictness() != JoinStrictness::Asof) - right_stream_push_down_updated_steps = join_push_down(false /*push_to_left_stream*/, right_stream_filter_push_down_all_input_columns_available); + size_t updated_steps = 0; - if (left_stream_push_down_updated_steps || right_stream_push_down_updated_steps) + if (join_filter_push_down_actions.left_stream_filter_to_push_down) { - new_filter_expression = std::move(left_table_filter_dag); + updated_steps += addNewFilterStepOrThrow(parent_node, + nodes, + join_filter_push_down_actions.left_stream_filter_to_push_down, + join_filter_push_down_actions.left_stream_filter_removes_filter, + 0 /*child_idx*/, + false /*update_parent_filter*/); + LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), + "Pushed down filter {} to the {} side of join", + join_filter_push_down_actions.left_stream_filter_to_push_down->getOutputs()[0]->result_name, + JoinKind::Left); + } - if (table_join.kind() == JoinKind::Right) - new_filter_expression = std::move(right_table_filter_dag); + if (join_filter_push_down_actions.right_stream_filter_to_push_down) + { + updated_steps += addNewFilterStepOrThrow(parent_node, + nodes, + join_filter_push_down_actions.right_stream_filter_to_push_down, + join_filter_push_down_actions.right_stream_filter_removes_filter, + 1 /*child_idx*/, + false /*update_parent_filter*/); + LOG_DEBUG(&Poco::Logger::get("QueryPlanOptimizations"), + "Pushed down filter {} to the {} side of join", + join_filter_push_down_actions.right_stream_filter_to_push_down->getOutputs()[0]->result_name, + JoinKind::Right); } - if (new_filter_expression) + if (updated_steps > 0) { - const auto * filter_node = new_filter_expression->tryFindInOutputs(filter->getFilterColumnName()); + const auto & filter_column_name = filter->getFilterColumnName(); + const auto & filter_expression = filter->getExpression(); + + const auto * filter_node = filter_expression->tryFindInOutputs(filter_column_name); if (!filter_node && !filter->removesFilterColumn()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", - filter->getFilterColumnName(), new_filter_expression->dumpDAG()); + "Filter column {} was removed from ActionsDAG but it is needed in result. DAG:\n{}", + filter_column_name, filter_expression->dumpDAG()); + /// Filter column was replaced to constant. const bool filter_is_constant = filter_node && filter_node->column && isColumnConst(*filter_node->column); + if (!filter_node || filter_is_constant) { /// This means that all predicates of filter were pushed down. /// Replace current actions to expression, as we don't need to filter anything. - auto forward_columns_actions = std::make_shared(child->getOutputStream().header.getColumnsWithTypeAndName()); - parent = std::make_unique(child->getOutputStream(), std::move(forward_columns_actions)); + parent = std::make_unique(child->getOutputStream(), filter_expression); + } + else + { + filter->updateInputStream(child->getOutputStream()); } } - return left_stream_push_down_updated_steps; + return updated_steps; } size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes) @@ -546,9 +520,6 @@ size_t tryPushDownFilter(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes if (!keys.contains(column.name)) allowed_inputs.push_back(column.name); - // for (const auto & name : allowed_inputs) - // std::cerr << name << std::endl; - if (auto updated_steps = tryAddNewFilterStep(parent_node, nodes, allowed_inputs)) return updated_steps; } diff --git a/tests/performance/join_filter_pushdown_equivalent_sets.xml b/tests/performance/join_filter_pushdown_equivalent_sets.xml index a75e12cc5fc5..caddcb295c96 100644 --- a/tests/performance/join_filter_pushdown_equivalent_sets.xml +++ b/tests/performance/join_filter_pushdown_equivalent_sets.xml @@ -5,19 +5,11 @@ INSERT INTO test_table_1 SELECT number, number FROM numbers(5000000) INSERT INTO test_table_2 SELECT number, number FROM numbers(5000000) - - - join_kind - - INNER - LEFT - RIGHT - FULL - - - - - SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs {join_kind} JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE rhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 AND rhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE lhs.id = 5 FORMAT Null + SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id WHERE rhs.id = 5 FORMAT Null DROP TABLE test_table_1 DROP TABLE test_table_2 diff --git a/tests/queries/0_stateless/01655_plan_optimizations.reference b/tests/queries/0_stateless/01655_plan_optimizations.reference index 436d06c50768..1b9755a74d5d 100644 --- a/tests/queries/0_stateless/01655_plan_optimizations.reference +++ b/tests/queries/0_stateless/01655_plan_optimizations.reference @@ -180,12 +180,14 @@ Filter column: notEquals(__table1.number, 1_UInt8) > one condition of filter is pushed down before INNER JOIN Join Join -Filter column: notEquals(number, 1) +Filter column: and(notEquals(number, 1), notEquals(number, 2)) Join +Filter column: and(notEquals(b, 2), notEquals(b, 1)) > (analyzer) one condition of filter is pushed down before INNER JOIN Join Join -Filter column: notEquals(__table1.number, 1_UInt8) +Filter column: and(notEquals(__table1.number, 1_UInt8), notEquals(__table1.number, 2_UInt8)) +Filter column: and(notEquals(__table2.b, 2_UInt8), notEquals(__table2.b, 1_UInt8)) 3 3 > filter is pushed down before UNION Union diff --git a/tests/queries/0_stateless/01655_plan_optimizations.sh b/tests/queries/0_stateless/01655_plan_optimizations.sh index 5a5172642439..864dd69412a7 100755 --- a/tests/queries/0_stateless/01655_plan_optimizations.sh +++ b/tests/queries/0_stateless/01655_plan_optimizations.sh @@ -248,14 +248,14 @@ $CLICKHOUSE_CLIENT --allow_experimental_analyzer=0 -q " select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" | - grep -o "Join\|Filter column: notEquals(number, 1)" + grep -o "Join\|Filter column: and(notEquals(number, 1), notEquals(number, 2))\|Filter column: and(notEquals(b, 2), notEquals(b, 1))" echo "> (analyzer) one condition of filter is pushed down before INNER JOIN" $CLICKHOUSE_CLIENT --allow_experimental_analyzer=1 -q " explain actions = 1 select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) ) as r on a = r.b where a != 1 and b != 2 settings enable_optimize_predicate_expression = 0" | - grep -o "Join\|Filter column: notEquals(__table1.number, 1_UInt8)" + grep -o "Join\|Filter column: and(notEquals(__table1.number, 1_UInt8), notEquals(__table1.number, 2_UInt8))\|Filter column: and(notEquals(__table2.b, 2_UInt8), notEquals(__table2.b, 1_UInt8))" $CLICKHOUSE_CLIENT -q " select number as a, r.b from numbers(4) as l any inner join ( select number + 2 as b from numbers(3) diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference index df8198bc8568..aba659e8f130 100644 --- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference @@ -1,10 +1,16 @@ 1 1 1 +1 +1 1 1 1 1 1 +1 +1 +1 1 +1 1 1 1 1 1 1 1 diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference new file mode 100644 index 000000000000..00740e6380f4 --- /dev/null +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.reference @@ -0,0 +1,710 @@ +-- { echoOn } + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: INNER + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 3 + ALIAS id :: 0 -> __table1.id UInt64 : 4 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 4, 6_UInt8 :: 2) -> equals(__table1.id, 6_UInt8) UInt8 : 1 + FUNCTION equals(__table1.id : 4, 5_UInt8 :: 3) -> equals(__table1.id, 5_UInt8) UInt8 : 2 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 2, equals(__table1.id, 6_UInt8) :: 1) -> and(equals(__table1.id, 5_UInt8), equals(__table1.id, 6_UInt8)) UInt8 : 3 + Positions: 3 4 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 3 + ALIAS id :: 0 -> __table2.id UInt64 : 4 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 4, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + FUNCTION equals(__table2.id : 4, 6_UInt8 :: 3) -> equals(__table2.id, 6_UInt8) UInt8 : 2 + FUNCTION and(equals(__table2.id, 6_UInt8) :: 2, equals(__table2.id, 5_UInt8) :: 1) -> and(equals(__table2.id, 6_UInt8), equals(__table2.id, 5_UInt8)) UInt8 : 3 + Positions: 3 4 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: LEFT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT :: 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 4) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: LEFT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT :: 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: RIGHT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + (Projection + ))) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: RIGHT + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table1.id UInt64 + __table1.value String + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table1.id UInt64 : 3 + ALIAS value :: 1 -> __table1.value String : 0 + FUNCTION equals(__table1.id : 3, 5_UInt8 :: 2) -> equals(__table1.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Filter (( + (JOIN actions + Change column names to column identifiers))) + Header: __table2.id UInt64 + __table2.value String + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 2 + ALIAS id :: 0 -> __table2.id UInt64 : 3 + ALIAS value :: 1 -> __table2.value String : 0 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 2) -> equals(__table2.id, 5_UInt8) UInt8 : 1 + Positions: 1 3 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table1.id, 5_UInt8) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT :: 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: equals(__table2.id, 5_UInt8) (removed) + Actions: INPUT :: 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + FUNCTION equals(__table2.id : 3, 5_UInt8 :: 4) -> equals(__table2.id, 5_UInt8) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; +5 5 5 5 +SELECT '--'; +-- +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; +Expression ((Project names + Projection)) +Header: id UInt64 + rhs.id UInt64 + value String + rhs.value String +Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT : 1 -> __table1.value String : 1 + INPUT : 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + ALIAS __table1.id :: 0 -> id UInt64 : 4 + ALIAS __table1.value :: 1 -> value String : 0 + ALIAS __table2.value :: 2 -> rhs.value String : 1 + ALIAS __table2.id :: 3 -> rhs.id UInt64 : 2 +Positions: 4 2 0 1 + Filter ((WHERE + DROP unused columns after JOIN)) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Filter column: and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) (removed) + Actions: INPUT : 0 -> __table1.id UInt64 : 0 + INPUT :: 1 -> __table1.value String : 1 + INPUT :: 2 -> __table2.value String : 2 + INPUT : 3 -> __table2.id UInt64 : 3 + COLUMN Const(UInt8) -> 5_UInt8 UInt8 : 4 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 5 + FUNCTION equals(__table1.id : 0, 5_UInt8 :: 4) -> equals(__table1.id, 5_UInt8) UInt8 : 6 + FUNCTION equals(__table2.id : 3, 6_UInt8 :: 5) -> equals(__table2.id, 6_UInt8) UInt8 : 4 + FUNCTION and(equals(__table1.id, 5_UInt8) :: 6, equals(__table2.id, 6_UInt8) :: 4) -> and(equals(__table1.id, 5_UInt8), equals(__table2.id, 6_UInt8)) UInt8 : 5 + Positions: 5 0 1 2 3 + Join (JOIN FillRightFirst) + Header: __table1.id UInt64 + __table1.value String + __table2.value String + __table2.id UInt64 + Type: FULL + Strictness: ALL + Algorithm: HashJoin + Clauses: [(__table1.id) = (__table2.id)] + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table1.id UInt64 + __table1.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table1.id UInt64 : 2 + ALIAS value :: 1 -> __table1.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_1) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: __table2.id UInt64 + __table2.value String + Actions: INPUT : 0 -> id UInt64 : 0 + INPUT : 1 -> value String : 1 + ALIAS id :: 0 -> __table2.id UInt64 : 2 + ALIAS value :: 1 -> __table2.value String : 0 + Positions: 2 0 + ReadFromMergeTree (default.test_table_2) + Header: id UInt64 + value String + ReadType: Default + Parts: 1 + Granules: 1 +SELECT '--'; +-- +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; diff --git a/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql new file mode 100644 index 000000000000..9627b55e6337 --- /dev/null +++ b/tests/queries/0_stateless/03036_join_filter_push_down_equivalent_sets.sql @@ -0,0 +1,131 @@ +SET allow_experimental_analyzer = 1; +SET optimize_move_to_prewhere = 0; + +DROP TABLE IF EXISTS test_table_1; +CREATE TABLE test_table_1 +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +CREATE TABLE test_table_2 +( + id UInt64, + value String +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table_1 SELECT number, number FROM numbers(10); +INSERT INTO test_table_2 SELECT number, number FROM numbers(10); + +-- { echoOn } + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs INNER JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs LEFT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs RIGHT JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE rhs.id = 5; + +SELECT '--'; + +EXPLAIN header = 1, actions = 1 +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +SELECT '--'; + +SELECT lhs.id, rhs.id, lhs.value, rhs.value FROM test_table_1 AS lhs FULL JOIN test_table_2 AS rhs ON lhs.id = rhs.id +WHERE lhs.id = 5 AND rhs.id = 6; + +-- { echoOff } + +DROP TABLE test_table_1; +DROP TABLE test_table_2; From 6c307f043e139f81f2271d8bd56edb38b1d9e434 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 9 Apr 2024 18:52:45 +0300 Subject: [PATCH 467/470] Updated implementation --- .../Optimizations/filterPushDown.cpp | 26 +++++++++++++++++++ .../02861_filter_pushdown_const_bug.reference | 8 +----- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index f59b4dafbae0..91140c4ab37a 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -349,8 +349,30 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: size_t updated_steps = 0; + /** If result filter to left or right stream has column that is one of the stream inputs, we need distinguish filter column from + * actual input column. It is necessary because after filter step, filter column became constant column with value 1, and + * not all JOIN algorithms properly work with constants. + * + * Example: SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; + */ + auto update_stream_filter_node_if_needed = [&](ActionsDAG & stream_filter, const Block & stream_header) + { + auto & stream_filter_output_nodes = stream_filter.getOutputs(); + const auto & stream_filter_node = stream_filter_output_nodes[0]; + if (!stream_header.has(stream_filter_node->result_name)) + return false; + + auto & alias_node = stream_filter.addAlias(*stream_filter_node, "__filter" + stream_filter_node->result_name); + stream_filter_output_nodes.insert(stream_filter_output_nodes.begin(), &alias_node); + return true; + }; + if (join_filter_push_down_actions.left_stream_filter_to_push_down) { + bool updated_filter = update_stream_filter_node_if_needed(*join_filter_push_down_actions.left_stream_filter_to_push_down, left_stream_input_header); + if (updated_filter) + join_filter_push_down_actions.left_stream_filter_removes_filter = true; + updated_steps += addNewFilterStepOrThrow(parent_node, nodes, join_filter_push_down_actions.left_stream_filter_to_push_down, @@ -365,6 +387,10 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: if (join_filter_push_down_actions.right_stream_filter_to_push_down) { + bool updated_filter = update_stream_filter_node_if_needed(*join_filter_push_down_actions.right_stream_filter_to_push_down, right_stream_input_header); + if (updated_filter) + join_filter_push_down_actions.right_stream_filter_removes_filter = true; + updated_steps += addNewFilterStepOrThrow(parent_node, nodes, join_filter_push_down_actions.right_stream_filter_to_push_down, diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference index aba659e8f130..866d6cb7ec38 100644 --- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.reference @@ -1,16 +1,10 @@ 1 1 1 -1 -1 1 1 1 1 1 -1 -1 -1 1 -1 1 -1 1 1 1 1 1 +1 2 From 45bf7163e974c429221beae2637d0c700df81704 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 10 Apr 2024 10:15:51 +0300 Subject: [PATCH 468/470] Fixed tests --- src/Processors/QueryPlan/Optimizations/filterPushDown.cpp | 2 +- tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 91140c4ab37a..22b5ae0dd66a 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -362,7 +362,7 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: if (!stream_header.has(stream_filter_node->result_name)) return false; - auto & alias_node = stream_filter.addAlias(*stream_filter_node, "__filter" + stream_filter_node->result_name); + const auto & alias_node = stream_filter.addAlias(*stream_filter_node, "__filter" + stream_filter_node->result_name); stream_filter_output_nodes.insert(stream_filter_output_nodes.begin(), &alias_node); return true; }; diff --git a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql index a299e50984f9..ea52df5d4b43 100644 --- a/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql +++ b/tests/queries/0_stateless/02861_filter_pushdown_const_bug.sql @@ -1,3 +1,5 @@ +SET allow_experimental_analyzer = 1; + DROP TABLE IF EXISTS t1; CREATE TABLE t1 (key UInt8) ENGINE = Memory; From 4a1a932c5f567cd2275d2bd3a4f6933cb0abfced Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 10 Apr 2024 16:04:06 +0300 Subject: [PATCH 469/470] Fixed tests --- src/Interpreters/ActionsDAG.cpp | 44 ++++++++++--------- src/Interpreters/ActionsDAG.h | 8 ++-- .../Optimizations/filterPushDown.cpp | 30 +------------ .../03093_filter_push_down_crash.reference | 5 +++ .../03093_filter_push_down_crash.sql.j2 | 11 +++++ 5 files changed, 45 insertions(+), 53 deletions(-) create mode 100644 tests/queries/0_stateless/03093_filter_push_down_crash.reference create mode 100644 tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index e6a489c5785a..06e6e1f8fc83 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -2328,9 +2328,9 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu const std::string & filter_name, bool removes_filter, const Names & left_stream_available_columns_to_push_down, - const ColumnsWithTypeAndName & left_stream_all_inputs, + const Block & left_stream_header, const Names & right_stream_available_columns_to_push_down, - const ColumnsWithTypeAndName & right_stream_all_inputs, + const Block & right_stream_header, const Names & equivalent_columns_to_push_down, const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, const std::unordered_map & equivalent_right_stream_column_to_left_stream_column) @@ -2418,16 +2418,29 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu } } - auto left_stream_filter_to_push_down = createActionsForConjunction(left_stream_allowed_conjunctions, left_stream_all_inputs); - auto right_stream_filter_to_push_down = createActionsForConjunction(right_stream_allowed_conjunctions, right_stream_all_inputs); + auto left_stream_filter_to_push_down = createActionsForConjunction(left_stream_allowed_conjunctions, left_stream_header.getColumnsWithTypeAndName()); + auto right_stream_filter_to_push_down = createActionsForConjunction(right_stream_allowed_conjunctions, right_stream_header.getColumnsWithTypeAndName()); auto replace_equivalent_columns_in_filter = [](const ActionsDAGPtr & filter, - const ColumnsWithTypeAndName & stream_inputs, + const Block & stream_header, const std::unordered_map & columns_to_replace) { auto updated_filter = ActionsDAG::buildFilterActionsDAG({filter->getOutputs()[0]}, columns_to_replace); chassert(updated_filter->getOutputs().size() == 1); + /** If result filter to left or right stream has column that is one of the stream inputs, we need distinguish filter column from + * actual input column. It is necessary because after filter step, filter column became constant column with value 1, and + * not all JOIN algorithms properly work with constants. + * + * Example: SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; + */ + const auto * stream_filter_node = updated_filter->getOutputs()[0]; + if (stream_header.has(stream_filter_node->result_name)) + { + const auto & alias_node = updated_filter->addAlias(*stream_filter_node, "__filter" + stream_filter_node->result_name); + updated_filter->getOutputs()[0] = &alias_node; + } + std::unordered_map> updated_filter_inputs; for (const auto & input : updated_filter->getInputs()) @@ -2449,7 +2462,7 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu updated_filter_inputs[input->result_name].push_back(updated_filter_input_node); } - for (const auto & input_column : stream_inputs) + for (const auto & input_column : stream_header.getColumnsWithTypeAndName()) { const Node * input; auto & list = updated_filter_inputs[input_column.name]; @@ -2472,12 +2485,12 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu if (left_stream_filter_to_push_down) left_stream_filter_to_push_down = replace_equivalent_columns_in_filter(left_stream_filter_to_push_down, - left_stream_all_inputs, + left_stream_header, equivalent_right_stream_column_to_left_stream_column); if (right_stream_filter_to_push_down) right_stream_filter_to_push_down = replace_equivalent_columns_in_filter(right_stream_filter_to_push_down, - right_stream_all_inputs, + right_stream_header, equivalent_left_stream_column_to_right_stream_column); /* @@ -2492,27 +2505,16 @@ ActionsDAG::ActionsForJOINFilterPushDown ActionsDAG::splitActionsForJOINFilterPu bool left_stream_filter_removes_filter = true; bool right_stream_filter_removes_filter = true; - auto columns_have_column_with_name = [](const ColumnsWithTypeAndName & columns, const std::string & column_name) - { - for (const auto & column : columns) - { - if (column.name == column_name) - return true; - } - - return false; - }; - if (left_stream_filter_to_push_down) { const auto & left_stream_filter_column_name = left_stream_filter_to_push_down->getOutputs()[0]->result_name; - left_stream_filter_removes_filter = !columns_have_column_with_name(left_stream_all_inputs, left_stream_filter_column_name); + left_stream_filter_removes_filter = !left_stream_header.has(left_stream_filter_column_name); } if (right_stream_filter_to_push_down) { const auto & right_stream_filter_column_name = right_stream_filter_to_push_down->getOutputs()[0]->result_name; - right_stream_filter_removes_filter = !columns_have_column_with_name(right_stream_all_inputs, right_stream_filter_column_name); + right_stream_filter_removes_filter = !right_stream_header.has(right_stream_filter_column_name); } ActionsDAG::ActionsForJOINFilterPushDown result diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 278ea4e5ca4c..a8a377866d3a 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -392,10 +392,10 @@ class ActionsDAG * @param removes_filter - if filter is removed after it is applied. * @param left_stream_available_columns_to_push_down - columns from left stream that are safe to use in push down conditions * to left stream. - * @param left_stream_all_inputs - all left streams columns. + * @param left_stream_header - left stream header. * @param right_stream_available_columns_to_push_down - columns from right stream that are safe to use in push down conditions * to right stream. - * @param right_stream_all_inputs - all right stream columns. + * @param right_stream_header - right stream header. * @param equivalent_columns_to_push_down - columns from left and right streams that are safe to use in push down conditions * to left and right streams. * @param equivalent_left_stream_column_to_right_stream_column - equivalent left stream column name to right stream column map. @@ -405,9 +405,9 @@ class ActionsDAG const std::string & filter_name, bool removes_filter, const Names & left_stream_available_columns_to_push_down, - const ColumnsWithTypeAndName & left_stream_all_inputs, + const Block & left_stream_header, const Names & right_stream_available_columns_to_push_down, - const ColumnsWithTypeAndName & right_stream_all_inputs, + const Block & right_stream_header, const Names & equivalent_columns_to_push_down, const std::unordered_map & equivalent_left_stream_column_to_right_stream_column, const std::unordered_map & equivalent_right_stream_column_to_left_stream_column); diff --git a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp index 22b5ae0dd66a..ebf780bb692e 100644 --- a/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp +++ b/src/Processors/QueryPlan/Optimizations/filterPushDown.cpp @@ -242,10 +242,10 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: * to RIGHT/LEFT JOIN side. */ - const auto & left_stream_input_header = child->getInputStreams().front().header; - const auto & right_stream_input_header = child->getInputStreams().back().header; const auto & join_header = child->getOutputStream().header; const auto & table_join = join ? join->getJoin()->getTableJoin() : filled_join->getJoin()->getTableJoin(); + const auto & left_stream_input_header = child->getInputStreams().front().header; + const auto & right_stream_input_header = child->getInputStreams().back().header; if (table_join.kind() == JoinKind::Full) return 0; @@ -349,30 +349,8 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: size_t updated_steps = 0; - /** If result filter to left or right stream has column that is one of the stream inputs, we need distinguish filter column from - * actual input column. It is necessary because after filter step, filter column became constant column with value 1, and - * not all JOIN algorithms properly work with constants. - * - * Example: SELECT key FROM ( SELECT key FROM t1 ) AS t1 JOIN ( SELECT key FROM t1 ) AS t2 ON t1.key = t2.key WHERE key; - */ - auto update_stream_filter_node_if_needed = [&](ActionsDAG & stream_filter, const Block & stream_header) - { - auto & stream_filter_output_nodes = stream_filter.getOutputs(); - const auto & stream_filter_node = stream_filter_output_nodes[0]; - if (!stream_header.has(stream_filter_node->result_name)) - return false; - - const auto & alias_node = stream_filter.addAlias(*stream_filter_node, "__filter" + stream_filter_node->result_name); - stream_filter_output_nodes.insert(stream_filter_output_nodes.begin(), &alias_node); - return true; - }; - if (join_filter_push_down_actions.left_stream_filter_to_push_down) { - bool updated_filter = update_stream_filter_node_if_needed(*join_filter_push_down_actions.left_stream_filter_to_push_down, left_stream_input_header); - if (updated_filter) - join_filter_push_down_actions.left_stream_filter_removes_filter = true; - updated_steps += addNewFilterStepOrThrow(parent_node, nodes, join_filter_push_down_actions.left_stream_filter_to_push_down, @@ -387,10 +365,6 @@ static size_t tryPushDownOverJoinStep(QueryPlan::Node * parent_node, QueryPlan:: if (join_filter_push_down_actions.right_stream_filter_to_push_down) { - bool updated_filter = update_stream_filter_node_if_needed(*join_filter_push_down_actions.right_stream_filter_to_push_down, right_stream_input_header); - if (updated_filter) - join_filter_push_down_actions.right_stream_filter_removes_filter = true; - updated_steps += addNewFilterStepOrThrow(parent_node, nodes, join_filter_push_down_actions.right_stream_filter_to_push_down, diff --git a/tests/queries/0_stateless/03093_filter_push_down_crash.reference b/tests/queries/0_stateless/03093_filter_push_down_crash.reference new file mode 100644 index 000000000000..bf98540f4b3d --- /dev/null +++ b/tests/queries/0_stateless/03093_filter_push_down_crash.reference @@ -0,0 +1,5 @@ +1 \N 1 +1 \N 1 +1 \N 1 +1 \N 1 +1 \N 1 diff --git a/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 b/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 new file mode 100644 index 000000000000..2cbbd89ca0cb --- /dev/null +++ b/tests/queries/0_stateless/03093_filter_push_down_crash.sql.j2 @@ -0,0 +1,11 @@ +{% for join_algorithm in ['default', 'full_sorting_merge', 'hash', 'partial_merge', 'grace_hash'] -%} + +SET join_algorithm = '{{ join_algorithm }}'; + +SELECT * +FROM (SELECT 1 AS key) AS t1 +JOIN (SELECT NULL, 1 AS key) AS t2 +ON t1.key = t2.key +WHERE t1.key ORDER BY key; + +{% endfor -%} From e6a9556daf9b56bb62fdef08cdb41a809d07119f Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 10 Apr 2024 13:16:21 +0200 Subject: [PATCH 470/470] Put functions in BackupUtils.h to a separate namespace BackupUtils. --- src/Backups/BackupEntriesCollector.cpp | 4 ++-- src/Backups/BackupUtils.cpp | 10 +++++----- src/Backups/BackupUtils.h | 11 ++++++++--- src/Backups/BackupsWorker.cpp | 2 +- src/Backups/RestorerFromBackup.cpp | 8 ++++---- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index d05d22f52aa3..d0c09caf6b9c 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -122,7 +122,7 @@ BackupEntries BackupEntriesCollector::run() = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); /// Do renaming in the create queries according to the renaming config. - renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements); + renaming_map = BackupUtils::makeRenamingMap(backup_query_elements); /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". calculateRootPathInBackup(); @@ -569,7 +569,7 @@ std::vector> BackupEntriesCollector::findTablesInD auto filter_by_table_name = [&](const String & table_name) { - if (isInnerTableShouldBeSkippedForBackup(database_name, table_name)) + if (BackupUtils::isInnerTable(database_name, table_name)) return false; if (database_info.tables.contains(table_name)) diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index 1564587dc67a..fb448fb64adf 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -8,10 +8,10 @@ #include -namespace DB +namespace DB::BackupUtils { -DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements) +DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements) { DDLRenamingMap map; @@ -120,12 +120,12 @@ bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, con return compareRestoredTableDef(restored_database_create_query, create_query_from_backup, global_context); } -bool isInnerTableShouldBeSkippedForBackup(const QualifiedTableName & table_name) +bool isInnerTable(const QualifiedTableName & table_name) { - return isInnerTableShouldBeSkippedForBackup(table_name.database, table_name.table); + return isInnerTable(table_name.database, table_name.table); } -bool isInnerTableShouldBeSkippedForBackup(const String & /* database_name */, const String & table_name) +bool isInnerTable(const String & /* database_name */, const String & table_name) { /// We skip inner tables of materialized views. return table_name.starts_with(".inner.") || table_name.starts_with(".inner_id."); diff --git a/src/Backups/BackupUtils.h b/src/Backups/BackupUtils.h index 48fe2dee7205..ba889eccc48e 100644 --- a/src/Backups/BackupUtils.h +++ b/src/Backups/BackupUtils.h @@ -11,8 +11,11 @@ class AccessRightsElements; class DDLRenamingMap; struct QualifiedTableName; +namespace BackupUtils +{ + /// Initializes a DDLRenamingMap from a BACKUP or RESTORE query. -DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements); +DDLRenamingMap makeRenamingMap(const ASTBackupQuery::Elements & elements); /// Returns access required to execute BACKUP query. AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements); @@ -22,7 +25,9 @@ bool compareRestoredTableDef(const IAST & restored_table_create_query, const IAS bool compareRestoredDatabaseDef(const IAST & restored_database_create_query, const IAST & create_query_from_backup, const ContextPtr & global_context); /// Returns true if this table should be skipped while making a backup because it's an inner table. -bool isInnerTableShouldBeSkippedForBackup(const QualifiedTableName & table_name); -bool isInnerTableShouldBeSkippedForBackup(const String & database_name, const String & table_name); +bool isInnerTable(const QualifiedTableName & table_name); +bool isInnerTable(const String & database_name, const String & table_name); + +} } diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 96fe770227c8..78455ca077d9 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -562,7 +562,7 @@ void BackupsWorker::doBackup( /// Checks access rights if this is not ON CLUSTER query. /// (If this is ON CLUSTER query executeDDLQueryOnCluster() will check access rights later.) - auto required_access = getRequiredAccessToBackup(backup_query->elements); + auto required_access = BackupUtils::getRequiredAccessToBackup(backup_query->elements); if (!on_cluster) context->checkAccess(required_access); diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index fcbe26caa534..2e576130dbcc 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -121,7 +121,7 @@ void RestorerFromBackup::run(Mode mode) restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); /// Do renaming in the create queries according to the renaming config. - renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements); + renaming_map = BackupUtils::makeRenamingMap(restore_query_elements); /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". findRootPathsInBackup(); @@ -420,7 +420,7 @@ void RestorerFromBackup::findTableInBackupImpl(const QualifiedTableName & table_ } QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); - if (skip_if_inner_table && isInnerTableShouldBeSkippedForBackup(table_name)) + if (skip_if_inner_table && BackupUtils::isInnerTable(table_name)) return; auto read_buffer = backup->readFile(*metadata_path); @@ -766,7 +766,7 @@ void RestorerFromBackup::checkDatabase(const String & database_name) ASTPtr existing_database_def = database->getCreateDatabaseQuery(); ASTPtr database_def_from_backup = database_info.create_database_query; - if (!compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext())) + if (!BackupUtils::compareRestoredDatabaseDef(*existing_database_def, *database_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_DATABASE, @@ -937,7 +937,7 @@ void RestorerFromBackup::checkTable(const QualifiedTableName & table_name) { ASTPtr existing_table_def = database->getCreateTableQuery(resolved_id.table_name, context); ASTPtr table_def_from_backup = table_info.create_table_query; - if (!compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext())) + if (!BackupUtils::compareRestoredTableDef(*existing_table_def, *table_def_from_backup, context->getGlobalContext())) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE,