From b6201f57e18e889e42a8bc133eecb40eed854f09 Mon Sep 17 00:00:00 2001 From: kyligence-git Date: Tue, 9 Jul 2024 23:48:04 +0000 Subject: [PATCH 1/3] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240710) --- cpp-ch/clickhouse.version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index 6bba1b705e70..f40e9a51d346 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,4 +1,4 @@ CH_ORG=Kyligence -CH_BRANCH=rebase_ch/20240706 -CH_COMMIT=25bf31bfbdf +CH_BRANCH=rebase_ch/20240710 +CH_COMMIT=feb85da32a5 From c91cb1ee76588a73d34cb47edd138e3ce76e3576 Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Wed, 10 Jul 2024 18:53:44 +0800 Subject: [PATCH 2/3] Fix UT due to https://github.com/ClickHouse/ClickHouse/pull/54881 --- cpp-ch/local-engine/Common/CHUtil.cpp | 2 + cpp-ch/local-engine/Common/DebugUtils.cpp | 26 +- .../tests/data/54881.snappy.parquet | Bin 0 -> 1529 bytes cpp-ch/local-engine/tests/gluten_test_util.h | 10 + .../tests/gtest_clickhouse_54881.cpp | 84 ++++ .../tests/json/clickhouse_pr_54881.json | 379 ++++++++++++++++++ 6 files changed, 488 insertions(+), 13 deletions(-) create mode 100644 cpp-ch/local-engine/tests/data/54881.snappy.parquet create mode 100644 cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp create mode 100644 cpp-ch/local-engine/tests/json/clickhouse_pr_54881.json diff --git a/cpp-ch/local-engine/Common/CHUtil.cpp b/cpp-ch/local-engine/Common/CHUtil.cpp index 800039f1d262..a3c856334938 100644 --- a/cpp-ch/local-engine/Common/CHUtil.cpp +++ b/cpp-ch/local-engine/Common/CHUtil.cpp @@ -762,6 +762,8 @@ void BackendInitializerUtil::initSettings(std::map & b settings.set("function_json_value_return_type_allow_complex", true); settings.set("function_json_value_return_type_allow_nullable", true); settings.set("precise_float_parsing", true); + settings.set("enable_named_columns_in_function_tuple", false); + if (backend_conf_map.contains(GLUTEN_TASK_OFFHEAP)) { auto task_memory = std::stoull(backend_conf_map.at(GLUTEN_TASK_OFFHEAP)); diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp b/cpp-ch/local-engine/Common/DebugUtils.cpp index 35f1f98cd935..9803eee5ee46 100644 --- a/cpp-ch/local-engine/Common/DebugUtils.cpp +++ b/cpp-ch/local-engine/Common/DebugUtils.cpp @@ -29,12 +29,12 @@ namespace debug { void headBlock(const DB::Block & block, size_t count) { - std::cerr << "============Block============" << std::endl; - std::cerr << block.dumpStructure() << std::endl; + std::cout << "============Block============" << std::endl; + std::cout << block.dumpStructure() << std::endl; // print header for (const auto & name : block.getNames()) - std::cerr << name << "\t"; - std::cerr << std::endl; + std::cout << name << "\t"; + std::cout << std::endl; // print rows for (size_t row = 0; row < std::min(count, block.rows()); ++row) @@ -45,36 +45,36 @@ void headBlock(const DB::Block & block, size_t count) auto col = block.getByPosition(column).column; if (column > 0) - std::cerr << "\t"; + std::cout << "\t"; DB::WhichDataType which(type); if (which.isAggregateFunction()) { - std::cerr << "Nan"; + std::cout << "Nan"; } else if (col->isNullAt(row)) { - std::cerr << "null"; + std::cout << "null"; } else { - std::cerr << toString((*col)[row]); + std::cout << toString((*col)[row]); } } - std::cerr << std::endl; + std::cout << std::endl; } } void headColumn(const DB::ColumnPtr & column, size_t count) { - std::cerr << "============Column============" << std::endl; + std::cout << "============Column============" << std::endl; // print header - std::cerr << column->getName() << "\t"; - std::cerr << std::endl; + std::cout << column->getName() << "\t"; + std::cout << std::endl; // print rows for (size_t row = 0; row < std::min(count, column->size()); ++row) - std::cerr << toString((*column)[row]) << std::endl; + std::cout << toString((*column)[row]) << std::endl; } } diff --git a/cpp-ch/local-engine/tests/data/54881.snappy.parquet b/cpp-ch/local-engine/tests/data/54881.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0bc4860c90f8f24d1b4e464425f9ea0fb8a8c22e GIT binary patch literal 1529 zcmbW1&ubGw6o6kg+hnpy6`?ciLJnb}p@q0~cao+pMf4_~iXaG9WV_we;3jFa8%1ip z`2+7>J@nwgo8V0-NWrW14-h1H&`SmJ5AaR8lkAd{T>^PC?|t9Cycx1_?WRqEZqTdr z^_Ne#%T&V^p#)$-%|Rgp`9fX=GE3GAaQ0~NcTD|0e*dwU!tw$m0xKXU%Zq93*{jp% z7gHD!SV3SKVzRuH#(sT0{HCU`g1|I^6%mu=vuW(b=hIs$N)uF3P$i^f^@W%+E-8ji za&n~;`6H-eFo3gkev+2WCet{<(Sst{R5C56iD~)vWLmxu)3T{JU1o#^*aR457SV1T z4Kw5LL>Y%i*|SnagX^Vm*X=*J7kc;oj!QM5>OAO5+m?YN11^KQ3>v^Rqik$eNtvp~ zJOM+1RACx>!h$MUpG;^nHUrM^r;NRw5j~$M33Yv}K2cCD z&r~P!poMyRH>rLj)IUb*qijOm7!Ud=i)wivCNmf6>D{FIgHRvluxs{5Wm=W6Lv;?X zP)*l+{T<8Qb@5hMqYGj^^!s7E*VSjZ#Vs53#i@#LFR<_~?G60M>h~U59Y1on-N@CC zm3=dMyz855W*GGcUS!tHR@)D@Lv!uUzS(s #include #include +#include #include using BlockRowType = DB::ColumnsWithTypeAndName; @@ -32,6 +33,9 @@ using BlockFieldType = DB::ColumnWithTypeAndName; using AnotherRowType = DB::NamesAndTypesList; using AnotherFieldType = DB::NameAndTypePair; + +#define GLUTEN_DATA_DIR(file) "file://" SOURCE_DIR file + namespace parquet { class ColumnDescriptor; @@ -80,6 +84,12 @@ std::string JsonStringToBinary(const std::string_view & json) } } +inline std::string replaceLocalFilesWildcards(const String & haystack, const String & replaced) +{ + static constexpr auto _WILDCARD_ = "{replace_local_files}"; + return boost::replace_all_copy(haystack, _WILDCARD_, replaced); +} + inline DB::DataTypePtr BIGINT() { return std::make_shared(); diff --git a/cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp b/cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp new file mode 100644 index 000000000000..120888136d08 --- /dev/null +++ b/cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include +#include +#include + + +using namespace local_engine; + +using namespace DB; + +// Plan for https://github.com/ClickHouse/ClickHouse/pull/54881 +INCBIN(resource_embedded_pr_54881_json, SOURCE_DIR "/utils/extern-local-engine/tests/json/clickhouse_pr_54881.json"); + +TEST(Clickhouse, PR54881) +{ + const auto context1 = DB::Context::createCopy(SerializedPlanParser::global_context); + // context1->setSetting("enable_named_columns_in_function_tuple", DB::Field(true)); + auto settingxs = context1->getSettingsRef(); + EXPECT_FALSE(settingxs.enable_named_columns_in_function_tuple) << + "GLUTEN NEED set enable_named_columns_in_function_tuple to false"; + + const std::string split_template + = R"({"items":[{"uriFile":"{replace_local_files}","partitionIndex":"0","length":"1529","parquet":{},"schema":{},"metadataColumns":[{}]}]})"; + const std::string split + = replaceLocalFilesWildcards(split_template, GLUTEN_DATA_DIR("/utils/extern-local-engine/tests/data/54881.snappy.parquet")); + + SerializedPlanParser parser(context1); + parser.addSplitInfo(test::pb_util::JsonStringToBinary(split)); + + const auto local_executor = parser.createExecutor( + {reinterpret_cast(gresource_embedded_pr_54881_jsonData), gresource_embedded_pr_54881_jsonSize}); + + EXPECT_TRUE(local_executor->hasNext()); + const Block & block = *local_executor->nextColumnar(); + + debug::headBlock(block); + + EXPECT_EQ(2, block.columns()); + const auto & col_0 = *(block.getColumns()[0]); + EXPECT_EQ(col_0.getInt(0), 9); + EXPECT_EQ(col_0.getInt(1),10); + + Field field; + const auto & col_1 = *(block.getColumns()[1]); + col_1.get(0, field); + const Tuple& row_0 = field.get(); + EXPECT_EQ(2, row_0.size()); + + Int64 actual{-1}; + EXPECT_TRUE(row_0[0].tryGet(actual)); + EXPECT_EQ(9, actual); + + EXPECT_TRUE(row_0[1].tryGet(actual)); + EXPECT_EQ(10, actual); + + col_1.get(1, field); + const Tuple& row_1 = field.get(); + EXPECT_EQ(2, row_1.size()); + EXPECT_TRUE(row_1[0].tryGet(actual)); + EXPECT_EQ(10, actual); + + EXPECT_TRUE(row_1[1].tryGet(actual)); + EXPECT_EQ(11, actual); + + EXPECT_FALSE(local_executor->hasNext()); +} diff --git a/cpp-ch/local-engine/tests/json/clickhouse_pr_54881.json b/cpp-ch/local-engine/tests/json/clickhouse_pr_54881.json new file mode 100644 index 000000000000..64a5b2aba978 --- /dev/null +++ b/cpp-ch/local-engine/tests/json/clickhouse_pr_54881.json @@ -0,0 +1,379 @@ +{ + "extensions": [ + { + "extensionFunction": { + "functionAnchor": 2, + "name": "gt:i64_i64" + } + }, + { + "extensionFunction": { + "functionAnchor": 4, + "name": "alias:struct" + } + }, + { + "extensionFunction": { + "functionAnchor": 5, + "name": "named_struct:str_i64_str_i64" + } + }, + { + "extensionFunction": { + "functionAnchor": 1, + "name": "is_not_null:i64" + } + }, + { + "extensionFunction": { + "name": "and:bool_bool" + } + }, + { + "extensionFunction": { + "functionAnchor": 3, + "name": "alias:i64" + } + } + ], + "relations": [ + { + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [ + 2, + 3 + ] + } + }, + "input": { + "filter": { + "common": { + "direct": {} + }, + "input": { + "read": { + "common": { + "direct": {} + }, + "baseSchema": { + "names": [ + "i1", + "i2" + ], + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ] + }, + "columnTypes": [ + "NORMAL_COL", + "NORMAL_COL" + ] + }, + "filter": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "bool": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": {} + } + } + } + } + ] + } + } + }, + { + "value": { + "scalarFunction": { + "functionReference": 2, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": {} + } + } + } + }, + { + "value": { + "literal": { + "i64": "8" + } + } + } + ] + } + } + } + ] + } + }, + "advancedExtension": { + "optimization": { + "@type": "type.googleapis.com/google.protobuf.StringValue", + "value": "isMergeTree=0\n" + } + } + } + }, + "condition": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "bool": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": {} + } + } + } + } + ] + } + } + }, + { + "value": { + "scalarFunction": { + "functionReference": 2, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": {} + } + } + } + }, + { + "value": { + "literal": { + "i64": "8" + } + } + } + ] + } + } + } + ] + } + } + } + }, + "expressions": [ + { + "scalarFunction": { + "functionReference": 3, + "outputType": { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [ + { + "value": { + "selection": { + "directReference": { + "structField": {} + } + } + } + } + ] + } + }, + { + "scalarFunction": { + "functionReference": 4, + "outputType": { + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ], + "nullability": "NULLABILITY_REQUIRED", + "names": [ + "a", + "b" + ] + } + }, + "arguments": [ + { + "value": { + "scalarFunction": { + "functionReference": 5, + "outputType": { + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ], + "nullability": "NULLABILITY_REQUIRED", + "names": [ + "a", + "b" + ] + } + }, + "arguments": [ + { + "value": { + "literal": { + "string": "a" + } + } + }, + { + "value": { + "selection": { + "directReference": { + "structField": {} + } + } + } + }, + { + "value": { + "literal": { + "string": "b" + } + } + }, + { + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + } + } + ] + } + } + } + ] + } + } + ] + } + }, + "names": [ + "a#73", + "col1#67" + ], + "outputSchema": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ], + "nullability": "NULLABILITY_REQUIRED", + "names": [ + "a", + "b" + ] + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + } + } + ] +} \ No newline at end of file From dd6950ec2a5a1638a32f85b13387fd61d5d7651b Mon Sep 17 00:00:00 2001 From: Chang Chen Date: Wed, 10 Jul 2024 18:56:11 +0800 Subject: [PATCH 3/3] style --- cpp-ch/local-engine/Common/DebugUtils.cpp | 6 ------ cpp-ch/local-engine/tests/gluten_test_util.h | 2 +- cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp | 11 +++++------ 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/cpp-ch/local-engine/Common/DebugUtils.cpp b/cpp-ch/local-engine/Common/DebugUtils.cpp index 9803eee5ee46..27f9efdacf16 100644 --- a/cpp-ch/local-engine/Common/DebugUtils.cpp +++ b/cpp-ch/local-engine/Common/DebugUtils.cpp @@ -48,17 +48,11 @@ void headBlock(const DB::Block & block, size_t count) std::cout << "\t"; DB::WhichDataType which(type); if (which.isAggregateFunction()) - { std::cout << "Nan"; - } else if (col->isNullAt(row)) - { std::cout << "null"; - } else - { std::cout << toString((*col)[row]); - } } std::cout << std::endl; } diff --git a/cpp-ch/local-engine/tests/gluten_test_util.h b/cpp-ch/local-engine/tests/gluten_test_util.h index 4f0fc2d98111..338d53be788a 100644 --- a/cpp-ch/local-engine/tests/gluten_test_util.h +++ b/cpp-ch/local-engine/tests/gluten_test_util.h @@ -24,8 +24,8 @@ #include #include #include -#include #include +#include #include using BlockRowType = DB::ColumnsWithTypeAndName; diff --git a/cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp b/cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp index 120888136d08..2628eb0e8c04 100644 --- a/cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp +++ b/cpp-ch/local-engine/tests/gtest_clickhouse_54881.cpp @@ -34,8 +34,7 @@ TEST(Clickhouse, PR54881) const auto context1 = DB::Context::createCopy(SerializedPlanParser::global_context); // context1->setSetting("enable_named_columns_in_function_tuple", DB::Field(true)); auto settingxs = context1->getSettingsRef(); - EXPECT_FALSE(settingxs.enable_named_columns_in_function_tuple) << - "GLUTEN NEED set enable_named_columns_in_function_tuple to false"; + EXPECT_FALSE(settingxs.enable_named_columns_in_function_tuple) << "GLUTEN NEED set enable_named_columns_in_function_tuple to false"; const std::string split_template = R"({"items":[{"uriFile":"{replace_local_files}","partitionIndex":"0","length":"1529","parquet":{},"schema":{},"metadataColumns":[{}]}]})"; @@ -46,7 +45,7 @@ TEST(Clickhouse, PR54881) parser.addSplitInfo(test::pb_util::JsonStringToBinary(split)); const auto local_executor = parser.createExecutor( - {reinterpret_cast(gresource_embedded_pr_54881_jsonData), gresource_embedded_pr_54881_jsonSize}); + {reinterpret_cast(gresource_embedded_pr_54881_jsonData), gresource_embedded_pr_54881_jsonSize}); EXPECT_TRUE(local_executor->hasNext()); const Block & block = *local_executor->nextColumnar(); @@ -56,12 +55,12 @@ TEST(Clickhouse, PR54881) EXPECT_EQ(2, block.columns()); const auto & col_0 = *(block.getColumns()[0]); EXPECT_EQ(col_0.getInt(0), 9); - EXPECT_EQ(col_0.getInt(1),10); + EXPECT_EQ(col_0.getInt(1), 10); Field field; const auto & col_1 = *(block.getColumns()[1]); col_1.get(0, field); - const Tuple& row_0 = field.get(); + const Tuple & row_0 = field.get(); EXPECT_EQ(2, row_0.size()); Int64 actual{-1}; @@ -72,7 +71,7 @@ TEST(Clickhouse, PR54881) EXPECT_EQ(10, actual); col_1.get(1, field); - const Tuple& row_1 = field.get(); + const Tuple & row_1 = field.get(); EXPECT_EQ(2, row_1.size()); EXPECT_TRUE(row_1[0].tryGet(actual)); EXPECT_EQ(10, actual);