From ef7b2c516bee6c82161a86574dbda91e05ab1b3a Mon Sep 17 00:00:00 2001 From: Kyligence Git Date: Fri, 21 Jun 2024 08:10:34 -0500 Subject: [PATCH] [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240621) (#6170) * [GLUTEN-1632][CH]Daily Update Clickhouse Version (20240621) * Fix Build/UT due to https://github.com/ClickHouse/ClickHouse/pull/65234 * fix style * refactor test --------- Co-authored-by: kyligence-git Co-authored-by: Chang Chen --- cpp-ch/clickhouse.version | 4 +- cpp-ch/local-engine/Parser/JoinRelParser.cpp | 2 +- .../Parser/MergeTreeRelParser.cpp | 2 +- .../local-engine/Parser/ProjectRelParser.cpp | 1 - .../Parser/SerializedPlanParser.cpp | 9 +- .../Parser/SerializedPlanParser.h | 10 +- cpp-ch/local-engine/local_engine_jni.cpp | 72 ++-- .../local-engine/tests/gluten_test_util.cpp | 4 +- cpp-ch/local-engine/tests/gtest_parser.cpp | 320 ++++++++++++++++++ 9 files changed, 373 insertions(+), 51 deletions(-) create mode 100644 cpp-ch/local-engine/tests/gtest_parser.cpp diff --git a/cpp-ch/clickhouse.version b/cpp-ch/clickhouse.version index 1e3ac8d88ea9..4a3088e54309 100644 --- a/cpp-ch/clickhouse.version +++ b/cpp-ch/clickhouse.version @@ -1,3 +1,3 @@ CH_ORG=Kyligence -CH_BRANCH=rebase_ch/20240620 -CH_COMMIT=f9c3886a767 +CH_BRANCH=rebase_ch/20240621 +CH_COMMIT=acf666c1c4f diff --git a/cpp-ch/local-engine/Parser/JoinRelParser.cpp b/cpp-ch/local-engine/Parser/JoinRelParser.cpp index 937e449b0825..58b156c3cf6e 100644 --- a/cpp-ch/local-engine/Parser/JoinRelParser.cpp +++ b/cpp-ch/local-engine/Parser/JoinRelParser.cpp @@ -459,7 +459,7 @@ void JoinRelParser::addConvertStep(TableJoin & table_join, DB::QueryPlan & left, rename_dag->getOutputs()[pos] = &alias; } } - rename_dag->projectInput(); + QueryPlanStepPtr project_step = std::make_unique(right.getCurrentDataStream(), rename_dag); project_step->setStepDescription("Right Table Rename"); steps.emplace_back(project_step.get()); diff --git a/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp b/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp index c36db6b7484a..b51b76b97415 100644 --- a/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp +++ b/cpp-ch/local-engine/Parser/MergeTreeRelParser.cpp @@ -211,7 +211,7 @@ PrewhereInfoPtr MergeTreeRelParser::parsePreWhereInfo(const substrait::Expressio prewhere_info->prewhere_column_name = filter_name; prewhere_info->need_filter = true; prewhere_info->remove_prewhere_column = true; - prewhere_info->prewhere_actions->projectInput(false); + for (const auto & name : input.getNames()) prewhere_info->prewhere_actions->tryRestoreColumn(name); return prewhere_info; diff --git a/cpp-ch/local-engine/Parser/ProjectRelParser.cpp b/cpp-ch/local-engine/Parser/ProjectRelParser.cpp index caf779ac13bc..eb190101f170 100644 --- a/cpp-ch/local-engine/Parser/ProjectRelParser.cpp +++ b/cpp-ch/local-engine/Parser/ProjectRelParser.cpp @@ -99,7 +99,6 @@ ProjectRelParser::SplittedActionsDAGs ProjectRelParser::splitActionsDAGInGenerat std::unordered_set first_split_nodes(array_join_node->children.begin(), array_join_node->children.end()); auto first_split_result = actions_dag->split(first_split_nodes); res.before_array_join = first_split_result.first; - res.before_array_join->projectInput(true); array_join_node = findArrayJoinNode(first_split_result.second); std::unordered_set second_split_nodes = {array_join_node}; diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp index 40e01e3052a3..f9ea783a2bbd 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.cpp @@ -234,6 +234,7 @@ std::shared_ptr SerializedPlanParser::expressionsToActionsDAG( throw Exception(ErrorCodes::BAD_ARGUMENTS, "unsupported projection type {}.", magic_enum::enum_name(expr.rex_type_case())); } actions_dag->project(required_columns); + actions_dag->appendInputsForUnusedColumns(header); return actions_dag; } @@ -1790,7 +1791,7 @@ QueryPlanPtr SerializedPlanParser::parse(const std::string & plan) QueryPlanPtr SerializedPlanParser::parseJson(const std::string & json_plan) { auto plan_ptr = std::make_unique(); - auto s = google::protobuf::util::JsonStringToMessage(absl::string_view(json_plan.c_str()), plan_ptr.get()); + auto s = google::protobuf::util::JsonStringToMessage(absl::string_view(json_plan), plan_ptr.get()); if (!s.ok()) throw Exception(ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, "Parse substrait::Plan from json string failed: {}", s.ToString()); return parse(std::move(plan_ptr)); @@ -1831,7 +1832,7 @@ void SerializedPlanParser::collectJoinKeys( } } -ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_types, const ASTPtr & ast) +ActionsDAG ASTParser::convertToActions(const NamesAndTypesList & name_and_types, const ASTPtr & ast) const { NamesAndTypesList aggregation_keys; ColumnNumbersList aggregation_keys_indexes_list; @@ -1840,9 +1841,9 @@ ActionsDAGPtr ASTParser::convertToActions(const NamesAndTypesList & name_and_typ ActionsMatcher::Data visitor_data( context, size_limits_for_set, - size_t(0), + static_cast(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), std::make_shared(), false /* no_subqueries */, false /* no_makeset */, diff --git a/cpp-ch/local-engine/Parser/SerializedPlanParser.h b/cpp-ch/local-engine/Parser/SerializedPlanParser.h index 45ff5a20b5ae..8964f42d9d02 100644 --- a/cpp-ch/local-engine/Parser/SerializedPlanParser.h +++ b/cpp-ch/local-engine/Parser/SerializedPlanParser.h @@ -112,7 +112,7 @@ static const std::map SCALAR_FUNCTIONS {"rand", "randCanonical"}, {"isnan", "isNaN"}, {"bin", "sparkBin"}, - {"rint", "sparkRint"}, + {"rint", "sparkRint"}, /// string functions {"like", "like"}, @@ -151,7 +151,7 @@ static const std::map SCALAR_FUNCTIONS {"initcap", "initcapUTF8"}, {"conv", "sparkConv"}, {"uuid", "generateUUIDv4"}, - {"levenshteinDistance", "editDistanceUTF8"}, + {"levenshteinDistance", "editDistanceUTF8"}, /// hash functions {"crc32", "CRC32"}, @@ -278,7 +278,7 @@ class SerializedPlanParser materialize_inputs.emplace_back(materialize_input); } - void addSplitInfo(std::string & split_info) { split_infos.emplace_back(std::move(split_info)); } + void addSplitInfo(std::string && split_info) { split_infos.emplace_back(std::move(split_info)); } int nextSplitInfoIndex() { @@ -419,6 +419,7 @@ class LocalExecutor : public BlockIterator RelMetricPtr getMetric() const { return metric; } void setMetric(RelMetricPtr metric_) { metric = metric_; } void setExtraPlanHolder(std::vector & extra_plan_holder_) { extra_plan_holder = std::move(extra_plan_holder_); } + private: std::unique_ptr writeBlockToSparkRow(DB::Block & block); @@ -434,7 +435,6 @@ class LocalExecutor : public BlockIterator DB::QueryPlanPtr current_query_plan; RelMetricPtr metric; std::vector extra_plan_holder; - }; @@ -450,7 +450,7 @@ class ASTParser ~ASTParser() = default; ASTPtr parseToAST(const Names & names, const substrait::Expression & rel); - ActionsDAGPtr convertToActions(const NamesAndTypesList & name_and_types, const ASTPtr & ast); + ActionsDAG convertToActions(const NamesAndTypesList & name_and_types, const ASTPtr & ast) const; private: ContextPtr context; diff --git a/cpp-ch/local-engine/local_engine_jni.cpp b/cpp-ch/local-engine/local_engine_jni.cpp index 38f188293726..256f373c28b5 100644 --- a/cpp-ch/local-engine/local_engine_jni.cpp +++ b/cpp-ch/local-engine/local_engine_jni.cpp @@ -36,10 +36,14 @@ #include #include #include +#include +#include #include #include #include +#include #include +#include #include #include #include @@ -51,10 +55,6 @@ #include #include #include -#include -#include -#include -#include #ifdef __cplusplus @@ -269,13 +269,12 @@ JNIEXPORT jlong Java_org_apache_gluten_vectorized_ExpressionEvaluatorJniWrapper_ parser.addInputIter(iter, materialize_input); } - for (jsize i = 0, split_info_arr_size = env->GetArrayLength(split_infos); i < split_info_arr_size; i++) { + for (jsize i = 0, split_info_arr_size = env->GetArrayLength(split_infos); i < split_info_arr_size; i++) + { jbyteArray split_info = static_cast(env->GetObjectArrayElement(split_infos, i)); - jsize split_info_size = env->GetArrayLength(split_info); + std::string::size_type split_info_size = env->GetArrayLength(split_info); jbyte * split_info_addr = env->GetByteArrayElements(split_info, nullptr); - std::string split_info_str; - split_info_str.assign(reinterpret_cast(split_info_addr), split_info_size); - parser.addSplitInfo(split_info_str); + parser.addSplitInfo(std::string{reinterpret_cast(split_info_addr), split_info_size}); } jsize plan_size = env->GetArrayLength(plan); @@ -630,8 +629,7 @@ JNIEXPORT jlong Java_org_apache_gluten_vectorized_CHShuffleSplitterJniWrapper_na .max_sort_buffer_size = static_cast(max_sort_buffer_size), .spill_firstly_before_stop = static_cast(spill_firstly_before_stop), .force_external_sort = static_cast(force_external_sort), - .force_mermory_sort = static_cast(force_memory_sort) - }; + .force_mermory_sort = static_cast(force_memory_sort)}; auto name = jstring2string(env, short_name); local_engine::SplitterHolder * splitter; if (prefer_spill) @@ -696,8 +694,7 @@ JNIEXPORT jlong Java_org_apache_gluten_vectorized_CHShuffleSplitterJniWrapper_na .throw_if_memory_exceed = static_cast(throw_if_memory_exceed), .flush_block_buffer_before_evict = static_cast(flush_block_buffer_before_evict), .force_external_sort = static_cast(force_external_sort), - .force_mermory_sort = static_cast(force_memory_sort) - }; + .force_mermory_sort = static_cast(force_memory_sort)}; auto name = jstring2string(env, short_name); local_engine::SplitterHolder * splitter; splitter = new local_engine::SplitterHolder{.splitter = std::make_unique(name, options, pusher)}; @@ -768,8 +765,8 @@ JNIEXPORT void Java_org_apache_gluten_vectorized_CHShuffleSplitterJniWrapper_clo } // CHBlockConverterJniWrapper -JNIEXPORT jobject -Java_org_apache_gluten_vectorized_CHBlockConverterJniWrapper_convertColumnarToRow(JNIEnv * env, jclass, jlong block_address, jintArray masks) +JNIEXPORT jobject Java_org_apache_gluten_vectorized_CHBlockConverterJniWrapper_convertColumnarToRow( + JNIEnv * env, jclass, jlong block_address, jintArray masks) { LOCAL_ENGINE_JNI_METHOD_START local_engine::CHColumnToSparkRow converter; @@ -958,21 +955,18 @@ JNIEXPORT jlong Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniW /// Parsing may fail when the number of recursive layers is large. /// Here, set a limit large enough to avoid this problem. /// Once this problem occurs, it is difficult to troubleshoot, because the pb of c++ will not provide any valid information - google::protobuf::io::CodedInputStream coded_in( - reinterpret_cast(plan_str.data()), static_cast(plan_str.size())); + google::protobuf::io::CodedInputStream coded_in(reinterpret_cast(plan_str.data()), static_cast(plan_str.size())); coded_in.SetRecursionLimit(100000); auto ok = plan_ptr->ParseFromCodedStream(&coded_in); if (!ok) throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, "Parse substrait::Plan from string failed"); - substrait::ReadRel::ExtensionTable extension_table = - local_engine::SerializedPlanParser::parseExtensionTable(split_info_str); + substrait::ReadRel::ExtensionTable extension_table = local_engine::SerializedPlanParser::parseExtensionTable(split_info_str); auto merge_tree_table = local_engine::MergeTreeRelParser::parseMergeTreeTable(extension_table); auto uuid = uuid_str + "_" + task_id; - auto * writer = new local_engine::SparkMergeTreeWriter( - merge_tree_table, query_context, uuid, partition_dir, bucket_dir); + auto * writer = new local_engine::SparkMergeTreeWriter(merge_tree_table, query_context, uuid, partition_dir, bucket_dir); env->ReleaseByteArrayElements(plan_, plan_buf_addr, JNI_ABORT); env->ReleaseByteArrayElements(split_info_, split_info_addr, JNI_ABORT); @@ -1044,8 +1038,8 @@ JNIEXPORT void Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWr LOCAL_ENGINE_JNI_METHOD_END(env, ) } -JNIEXPORT void -Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_writeToMergeTree(JNIEnv * env, jobject, jlong instanceId, jlong block_address) +JNIEXPORT void Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_writeToMergeTree( + JNIEnv * env, jobject, jlong instanceId, jlong block_address) { LOCAL_ENGINE_JNI_METHOD_START auto * writer = reinterpret_cast(instanceId); @@ -1054,7 +1048,8 @@ Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_writeToMe LOCAL_ENGINE_JNI_METHOD_END(env, ) } -JNIEXPORT jstring Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_closeMergeTreeWriter(JNIEnv * env, jobject, jlong instanceId) +JNIEXPORT jstring +Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_closeMergeTreeWriter(JNIEnv * env, jobject, jlong instanceId) { LOCAL_ENGINE_JNI_METHOD_START auto * writer = reinterpret_cast(instanceId); @@ -1067,7 +1062,14 @@ JNIEXPORT jstring Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn } JNIEXPORT jstring Java_org_apache_spark_sql_execution_datasources_CHDatasourceJniWrapper_nativeMergeMTParts( - JNIEnv * env, jobject, jbyteArray plan_, jbyteArray split_info_, jstring uuid_, jstring task_id_, jstring partition_dir_, jstring bucket_dir_) + JNIEnv * env, + jobject, + jbyteArray plan_, + jbyteArray split_info_, + jstring uuid_, + jstring task_id_, + jstring partition_dir_, + jstring bucket_dir_) { LOCAL_ENGINE_JNI_METHOD_START @@ -1095,16 +1097,14 @@ JNIEXPORT jstring Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn /// Parsing may fail when the number of recursive layers is large. /// Here, set a limit large enough to avoid this problem. /// Once this problem occurs, it is difficult to troubleshoot, because the pb of c++ will not provide any valid information - google::protobuf::io::CodedInputStream coded_in( - reinterpret_cast(plan_str.data()), static_cast(plan_str.size())); + google::protobuf::io::CodedInputStream coded_in(reinterpret_cast(plan_str.data()), static_cast(plan_str.size())); coded_in.SetRecursionLimit(100000); auto ok = plan_ptr->ParseFromCodedStream(&coded_in); if (!ok) throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_PROTOBUF_SCHEMA, "Parse substrait::Plan from string failed"); - substrait::ReadRel::ExtensionTable extension_table = - local_engine::SerializedPlanParser::parseExtensionTable(split_info_str); + substrait::ReadRel::ExtensionTable extension_table = local_engine::SerializedPlanParser::parseExtensionTable(split_info_str); google::protobuf::StringValue table; table.ParseFromString(extension_table.detail().value()); auto merge_tree_table = local_engine::parseMergeTreeTableString(table.value()); @@ -1114,12 +1114,12 @@ JNIEXPORT jstring Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn = local_engine::MergeTreeRelParser::copyToVirtualStorage(merge_tree_table, local_engine::SerializedPlanParser::global_context); local_engine::TempStorageFreer freer{temp_storage->getStorageID()}; // to release temp CustomStorageMergeTree with RAII - std::vector selected_parts - = local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames(temp_storage->getStorageID(), "", merge_tree_table.getPartNames()); + std::vector selected_parts = local_engine::StorageMergeTreeFactory::instance().getDataPartsByNames( + temp_storage->getStorageID(), "", merge_tree_table.getPartNames()); std::unordered_map partition_values; - std::vector loaded = - local_engine::mergeParts(selected_parts, partition_values, uuid_str, temp_storage, partition_dir, bucket_dir); + std::vector loaded + = local_engine::mergeParts(selected_parts, partition_values, uuid_str, temp_storage, partition_dir, bucket_dir); std::vector res; for (auto & partPtr : loaded) @@ -1156,7 +1156,8 @@ JNIEXPORT jobject Java_org_apache_spark_sql_execution_datasources_CHDatasourceJn partition_col_indice_vec.push_back(pIndice[i]); env->ReleaseIntArrayElements(partitionColIndice, pIndice, JNI_ABORT); - local_engine::BlockStripes bs = local_engine::BlockStripeSplitter::split(*block, partition_col_indice_vec, hasBucket, reserve_partition_columns); + local_engine::BlockStripes bs + = local_engine::BlockStripeSplitter::split(*block, partition_col_indice_vec, hasBucket, reserve_partition_columns); auto * addresses = env->NewLongArray(bs.block_addresses.size()); @@ -1366,7 +1367,8 @@ JNIEXPORT jlong Java_org_apache_gluten_memory_alloc_CHNativeMemoryAllocator_getD return -1; } -JNIEXPORT jlong Java_org_apache_gluten_memory_alloc_CHNativeMemoryAllocator_createListenableAllocator(JNIEnv * env, jclass, jobject listener) +JNIEXPORT jlong +Java_org_apache_gluten_memory_alloc_CHNativeMemoryAllocator_createListenableAllocator(JNIEnv * env, jclass, jobject listener) { LOCAL_ENGINE_JNI_METHOD_START auto listener_wrapper = std::make_shared(env->NewGlobalRef(listener)); diff --git a/cpp-ch/local-engine/tests/gluten_test_util.cpp b/cpp-ch/local-engine/tests/gluten_test_util.cpp index 7fdd32d1661b..0448092b960d 100644 --- a/cpp-ch/local-engine/tests/gluten_test_util.cpp +++ b/cpp-ch/local-engine/tests/gluten_test_util.cpp @@ -62,14 +62,14 @@ ActionsDAGPtr parseFilter(const std::string & filter, const AnotherRowType & nam size_limits_for_set, static_cast(0), name_and_types, - std::make_shared(name_and_types), + ActionsDAG(name_and_types), prepared_sets /* prepared_sets */, false /* no_subqueries */, false /* no_makeset */, false /* only_consts */, info); ActionsVisitor(visitor_data).visit(ast_exp); - return ActionsDAG::buildFilterActionsDAG({visitor_data.getActions()->getOutputs().back()}, node_name_to_input_column); + return ActionsDAG::buildFilterActionsDAG({visitor_data.getActions().getOutputs().back()}, node_name_to_input_column); } const char * get_data_dir() diff --git a/cpp-ch/local-engine/tests/gtest_parser.cpp b/cpp-ch/local-engine/tests/gtest_parser.cpp new file mode 100644 index 000000000000..cbe41c90c81a --- /dev/null +++ b/cpp-ch/local-engine/tests/gtest_parser.cpp @@ -0,0 +1,320 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include + +using namespace local_engine; +using namespace DB; + +std::string splitBinaryFromJson(const std::string & json) +{ + std::string binary; + substrait::ReadRel::LocalFiles local_files; + auto s = google::protobuf::util::JsonStringToMessage(absl::string_view(json), &local_files); + local_files.SerializeToString(&binary); + return binary; +} + +std::string JsonPlanFor65234() +{ + // Plan for https://github.com/ClickHouse/ClickHouse/pull/65234 + return R"( +{ + "extensions": [{ + "extensionFunction": { + "functionAnchor": 1, + "name": "is_not_null:str" + } + }, { + "extensionFunction": { + "functionAnchor": 2, + "name": "equal:str_str" + } + }, { + "extensionFunction": { + "functionAnchor": 3, + "name": "is_not_null:i64" + } + }, { + "extensionFunction": { + "name": "and:bool_bool" + } + }], + "relations": [{ + "root": { + "input": { + "project": { + "common": { + "emit": { + "outputMapping": [2] + } + }, + "input": { + "filter": { + "common": { + "direct": { + } + }, + "input": { + "read": { + "common": { + "direct": { + } + }, + "baseSchema": { + "names": ["r_regionkey", "r_name"], + "struct": { + "types": [{ + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }] + }, + "columnTypes": ["NORMAL_COL", "NORMAL_COL"] + }, + "filter": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "bool": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + } + }] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + } + }, { + "value": { + "literal": { + "string": "EUROPE" + } + } + }] + } + } + }] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "outputType": { + "bool": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + } + } + } + } + }] + } + } + }] + } + }, + "advancedExtension": { + "optimization": { + "@type": "type.googleapis.com/google.protobuf.StringValue", + "value": "isMergeTree\u003d0\n" + } + } + } + }, + "condition": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "scalarFunction": { + "functionReference": 1, + "outputType": { + "bool": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + } + }] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 2, + "outputType": { + "bool": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + "field": 1 + } + } + } + } + }, { + "value": { + "literal": { + "string": "EUROPE" + } + } + }] + } + } + }] + } + } + }, { + "value": { + "scalarFunction": { + "functionReference": 3, + "outputType": { + "bool": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + "arguments": [{ + "value": { + "selection": { + "directReference": { + "structField": { + } + } + } + } + }] + } + } + }] + } + } + } + }, + "expressions": [{ + "selection": { + "directReference": { + "structField": { + } + } + } + }] + } + }, + "names": ["r_regionkey#72"], + "outputSchema": { + "types": [{ + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }], + "nullability": "NULLABILITY_REQUIRED" + } + } + }] +} +)"; +} + +TEST(SerializedPlanParser, PR65234) +{ + const std::string split + = R"({"items":[{"uriFile":"file:///part-00000-16caa751-9774-470c-bd37-5c84c53373c8-c000.snappy.parquet","length":"84633","parquet":{},"schema":{},"metadataColumns":[{}]}]}")"; + SerializedPlanParser parser(SerializedPlanParser::global_context); + parser.addSplitInfo(splitBinaryFromJson(split)); + parser.parseJson(JsonPlanFor65234()); +}