From ee7d3c8c6163acb6292272257f66a8cd0c5c89b6 Mon Sep 17 00:00:00 2001 From: LiuNeng <1398775315@qq.com> Date: Wed, 27 Apr 2022 19:23:16 +0800 Subject: [PATCH] Support TPCH Q1 (#8) * support tpch q1 * clean log * disable background executors * add storage cache evict * enable function compile * support jemalloc * fix q6 benchmark failed * remove shared unwind --- cmake/unwind.cmake | 6 +- .../internal/jemalloc_internal_defs.h.in | 3 +- src/Interpreters/Context.cpp | 4 + src/Interpreters/Context.h | 3 + utils/local-engine/CMakeLists.txt | 4 +- utils/local-engine/Common/Logger.cpp | 2 +- utils/local-engine/Common/common.cpp | 8 ++ .../Parser/SerializedPlanParser.cpp | 102 +++++++++++++++--- .../Parser/SerializedPlanParser.h | 21 +++- .../Storages/SourceFromJavaIter.cpp | 8 +- .../Storages/StorageMergeTreeFactory.cpp | 20 +++- .../Storages/StorageMergeTreeFactory.h | 3 +- .../local-engine/cmake/libcxx/CMakeLists.txt | 77 ------------- .../cmake/libcxxabi/CMakeLists.txt | 48 --------- .../cmake/libunwind/CMakeLists.txt | 74 ------------- utils/local-engine/local_engine_jni.cpp | 24 +++-- .../tests/benchmark_local_engine.cpp | 4 +- 17 files changed, 171 insertions(+), 240 deletions(-) delete mode 100644 utils/local-engine/cmake/libcxx/CMakeLists.txt delete mode 100644 utils/local-engine/cmake/libcxxabi/CMakeLists.txt delete mode 100644 utils/local-engine/cmake/libunwind/CMakeLists.txt diff --git a/cmake/unwind.cmake b/cmake/unwind.cmake index 93789fa1451c..c9f5f30a5d6b 100644 --- a/cmake/unwind.cmake +++ b/cmake/unwind.cmake @@ -1,11 +1,7 @@ option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) if (USE_UNWIND) - if (MAKE_STATIC_LIBRARIES) - add_subdirectory(contrib/libunwind-cmake) - else() - add_subdirectory(${ClickHouse_SOURCE_DIR}/utils/local-engine/cmake/libunwind) - endif() + add_subdirectory(contrib/libunwind-cmake) set (UNWIND_LIBRARIES unwind) set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) diff --git a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in index 99ab2d53ca9c..0003f89510f4 100644 --- a/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in +++ b/contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in @@ -138,7 +138,8 @@ /* #undef JEMALLOC_MUTEX_INIT_CB */ /* Non-empty if the tls_model attribute is supported. */ -#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) +/*#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec")))*/ +#define JEMALLOC_TLS_MODEL /* * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index ac1bfc620b02..7ca88cd3fae2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -3197,5 +3197,9 @@ ReadSettings Context::getReadSettings() const return res; } +void Context::setBackgroundExecutorsInitialized(bool initialized) +{ + shared->is_background_executors_initialized = initialized; +} } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index c3615db90682..e6d3c14eb1b3 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -892,6 +892,9 @@ class Context: public std::enable_shared_from_this /** Get settings for reading from filesystem. */ ReadSettings getReadSettings() const; + /** Used to disable global context initialized. */ + void setBackgroundExecutorsInitialized(bool initialized); + private: std::unique_lock getLock() const; diff --git a/utils/local-engine/CMakeLists.txt b/utils/local-engine/CMakeLists.txt index af2c60395d3e..b267ed0a1e24 100644 --- a/utils/local-engine/CMakeLists.txt +++ b/utils/local-engine/CMakeLists.txt @@ -193,7 +193,9 @@ target_compile_options(absl_int128 PRIVATE -fPIC) target_compile_options(absl_raw_hash_set PRIVATE -fPIC) target_compile_options(absl_raw_hash_set PRIVATE -fPIC) add_compile_options(-fPIC) - +if (ENABLE_JEMALLOC) + target_compile_options(_jemalloc PRIVATE -fPIC) +endif() #target_compile_options(zstd PRIVATE -fPIC) #set(CPACK_PACKAGE_VERSION 0.1.0) diff --git a/utils/local-engine/Common/Logger.cpp b/utils/local-engine/Common/Logger.cpp index 293f3225b792..cd00aed91f7c 100644 --- a/utils/local-engine/Common/Logger.cpp +++ b/utils/local-engine/Common/Logger.cpp @@ -13,7 +13,7 @@ void local_engine::Logger::initConsoleLogger() AutoPtr pCons(new ConsoleChannel); AutoPtr pAsync(new AsyncChannel(pCons)); Poco::Logger::root().setChannel(pAsync); - Poco::Logger::root().setLevel("debug"); + Poco::Logger::root().setLevel("error"); Poco::Logger::root().debug("init logger success"); } diff --git a/utils/local-engine/Common/common.cpp b/utils/local-engine/Common/common.cpp index 7cf5574b27a5..1949a90a36f9 100644 --- a/utils/local-engine/Common/common.cpp +++ b/utils/local-engine/Common/common.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -24,10 +25,17 @@ void init() registerAllFunctions(); local_engine::SerializedPlanParser::shared_context = SharedContextHolder(Context::createShared()); local_engine::SerializedPlanParser::global_context = Context::createGlobal(local_engine::SerializedPlanParser::shared_context.get()); + // disable global context initialized + local_engine::SerializedPlanParser::global_context->setBackgroundExecutorsInitialized(true); local_engine::SerializedPlanParser::global_context->makeGlobalContext(); local_engine::SerializedPlanParser::global_context->setConfig(local_engine::SerializedPlanParser::config); local_engine::SerializedPlanParser::global_context->setPath("/"); local_engine::Logger::initConsoleLogger(); + + /// 128 MB + constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; + constexpr size_t compiled_expression_cache_elements_size_default = 10000; + CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size_default, compiled_expression_cache_size_default); } char * createExecutor(std::string plan_string) diff --git a/utils/local-engine/Parser/SerializedPlanParser.cpp b/utils/local-engine/Parser/SerializedPlanParser.cpp index a624241acc5c..659be66e8521 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.cpp +++ b/utils/local-engine/Parser/SerializedPlanParser.cpp @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -86,7 +88,7 @@ DB::QueryPlanPtr local_engine::SerializedPlanParser::parseMergeTreeTable(const s auto metadata = local_engine::buildMetaData(names_and_types_list, this->context); auto t_metadata = watch.elapsedMicroseconds(); query_context.metadata = metadata; - auto storage = storageFactory.getStorage(DB::StorageID(merge_tree_table.database, merge_tree_table.table), [merge_tree_table, metadata]() -> local_engine::CustomStorageMergeTreePtr { + auto storage = storageFactory.getStorage(DB::StorageID(merge_tree_table.database, merge_tree_table.table), metadata->getColumns(), [merge_tree_table, metadata]() -> local_engine::CustomStorageMergeTreePtr { auto custom_storage_merge_tree = std::make_shared( DB::StorageID(merge_tree_table.database, merge_tree_table.table), merge_tree_table.relative_path, @@ -256,6 +258,12 @@ DB::QueryPlanPtr local_engine::SerializedPlanParser::parseOp(const substrait::Re } required_columns.emplace_back(DB::NameWithAlias (name, name)); } + else if (expr.has_literal()) + { + auto const_col = parseArgument(actions_dag, expr); + actions_dag->addOrReplaceInIndex(*const_col); + required_columns.emplace_back(DB::NameWithAlias(const_col->result_name, const_col->result_name)); + } else { throw std::runtime_error("unsupported projection type"); @@ -336,7 +344,46 @@ DB::QueryPlanStepPtr local_engine::SerializedPlanParser::parseAggregate(DB::Quer auto expression_before_aggregate = std::make_unique(input, expression); plan.addStep(std::move(expression_before_aggregate)); - // TODO need support grouping key + std::set phase_set; + for (int i = 0; i < rel.measures_size(); ++i) + { + const auto & measure = rel.measures(i); + phase_set.emplace(measure.measure().phase()); + } + if (phase_set.size() > 1) + { + throw std::runtime_error("two many aggregate phase!"); + } + bool final=true; + if (phase_set.contains(substrait::AggregationPhase::AGGREGATION_PHASE_INITIAL_TO_INTERMEDIATE) + || phase_set.contains(substrait::AggregationPhase::AGGREGATION_PHASE_INTERMEDIATE_TO_INTERMEDIATE)) + final = false; + + bool only_merge = phase_set.contains(substrait::AggregationPhase::AGGREGATION_PHASE_INTERMEDIATE_TO_RESULT); + + + DB::ColumnNumbers keys = {}; + if (rel.groupings_size() == 1) + { + for (auto group : rel.groupings(0).grouping_expressions()) + { + if (group.has_selection() && group.selection().has_direct_reference()) + { + keys.emplace_back(group.selection().direct_reference().struct_field().field()); + } + else + { + throw std::runtime_error("unsupported group expression"); + + } + } + } + // only support one grouping or no grouping + else if (rel.groupings_size() != 0) + { + throw std::runtime_error("too many groupings"); + } + auto aggregates = DB::AggregateDescriptions(); for (int i = 0; i < rel.measures_size(); ++i) { @@ -346,25 +393,46 @@ DB::QueryPlanStepPtr local_engine::SerializedPlanParser::parseAggregate(DB::Quer auto function_name_idx = function_signature.find(":"); assert(function_name_idx != function_signature.npos && ("invalid function signature: " + function_signature).c_str()); auto function_name = function_signature.substr(0, function_name_idx); - agg.column_name = function_name +"(" + measure_names.at(i) + ")"; + if (only_merge) + { + agg.column_name = measure_names.at(i); + } + else + { + agg.column_name = function_name +"(" + measure_names.at(i) + ")"; + } agg.arguments = DB::ColumnNumbers{plan.getCurrentDataStream().header.getPositionByName(measure_names.at(i))}; agg.argument_names = DB::Names{measure_names.at(i)}; agg.function = ::getAggregateFunction(function_name, {plan.getCurrentDataStream().header.getByName(measure_names.at(i)).type}); aggregates.push_back(agg); } - auto aggregating_step = std::make_unique( - plan.getCurrentDataStream(), - this->getAggregateParam(plan.getCurrentDataStream().header, {}, aggregates), - true, - 1000000, - 1, - 1, - 1, - false, - nullptr, - DB::SortDescription()); - return aggregating_step; + + if (only_merge) + { + auto transform_params = std::make_shared(this->getMergedAggregateParam(plan.getCurrentDataStream().header, keys, aggregates), final); + return std::make_unique( + plan.getCurrentDataStream(), + transform_params, + false, + 1, + 1); + } + else + { + auto aggregating_step = std::make_unique( + plan.getCurrentDataStream(), + this->getAggregateParam(plan.getCurrentDataStream().header, keys, aggregates), + final, + 1000000, + 1, + 1, + 1, + false, + nullptr, + DB::SortDescription()); + return aggregating_step; + } } DB::NamesAndTypesList local_engine::SerializedPlanParser::blockToNameAndTypeList(const DB::Block & header) @@ -411,6 +479,7 @@ std::string local_engine::SerializedPlanParser::getFunctionName(std::string func } else { + LOG_ERROR(&Poco::Logger::get("SerializedPlanParser"), "doesn't support function {}", function_signature); throw std::runtime_error("doesn't support function " + function_signature); } } @@ -521,6 +590,7 @@ DB::QueryPlanPtr local_engine::SerializedPlanParser::parse(std::string & plan) { auto plan_ptr = std::make_unique(); plan_ptr->ParseFromString(plan); +// std::cerr << plan_ptr->DebugString() <query_pipeline = QueryPipelineBuilder::getPipeline(std::move(*pipeline_builder)); auto t_pipeline = stopwatch.elapsedMicroseconds(); diff --git a/utils/local-engine/Parser/SerializedPlanParser.h b/utils/local-engine/Parser/SerializedPlanParser.h index 33359d908149..e505e5530a81 100644 --- a/utils/local-engine/Parser/SerializedPlanParser.h +++ b/utils/local-engine/Parser/SerializedPlanParser.h @@ -67,12 +67,17 @@ static const std::map SCALAR_FUNCTIONS = { {"and", "and"}, {"lte", "lessOrEquals"}, {"lt", "less"}, - {"multiply", "multiply"}, - {"sum", "sum"}, {"TO_DATE", "toDate"}, {"equal", "equals"}, {"cast", ""}, - {"alias", "alias"} + {"alias", "alias"}, + {"subtract", "minus"}, + {"multiply", "multiply"}, + {"add", "plus"}, + // aggregate functions + {"count", "count"}, + {"avg", "avg"}, + {"sum", "sum"} }; static const std::set FUNCTION_NEED_KEEP_ARGUMENTS = {"alias"}; @@ -142,8 +147,14 @@ class SerializedPlanParser nullptr, settings.max_threads, settings.min_free_disk_space_for_temporary_data, - false, - settings.min_count_to_compile_aggregate_expression); + true, + 3); + } + + Aggregator::Params getMergedAggregateParam(const Block & header, const ColumnNumbers & keys, const AggregateDescriptions & aggregates) + { + Settings settings; + return Aggregator::Params(header, keys, aggregates, false, settings.max_threads); } diff --git a/utils/local-engine/Storages/SourceFromJavaIter.cpp b/utils/local-engine/Storages/SourceFromJavaIter.cpp index 3853d209c275..1ce542e5f7c8 100644 --- a/utils/local-engine/Storages/SourceFromJavaIter.cpp +++ b/utils/local-engine/Storages/SourceFromJavaIter.cpp @@ -1,4 +1,5 @@ #include "SourceFromJavaIter.h" +#include namespace local_engine { @@ -17,12 +18,15 @@ DB::Chunk SourceFromJavaIter::generate() DB::Block * data = reinterpret_cast(byteArrayToLong(env, block)); size_t rows = data->rows(); auto chunk = DB::Chunk(data->mutateColumns(), rows); -// delete data; + auto info = std::make_shared(); + info->is_overflows = data->info.is_overflows; + info->bucket_num = data->info.bucket_num; + chunk.setChunkInfo(info); return chunk; } else { - return DB::Chunk(); + return {}; } } SourceFromJavaIter::~SourceFromJavaIter() diff --git a/utils/local-engine/Storages/StorageMergeTreeFactory.cpp b/utils/local-engine/Storages/StorageMergeTreeFactory.cpp index 2d54bd2338f3..d2b3cd985b4a 100644 --- a/utils/local-engine/Storages/StorageMergeTreeFactory.cpp +++ b/utils/local-engine/Storages/StorageMergeTreeFactory.cpp @@ -6,16 +6,33 @@ local_engine::StorageMergeTreeFactory & local_engine::StorageMergeTreeFactory::i return ret; } local_engine::CustomStorageMergeTreePtr -local_engine::StorageMergeTreeFactory::getStorage(StorageID id, std::function creator) +local_engine::StorageMergeTreeFactory::getStorage(StorageID id, ColumnsDescription columns, std::function creator) { auto table_name = id.database_name + "." + id.table_name; if (!storage_map.contains(table_name)) { std::lock_guard lock(storage_map_mutex); + if (storage_map.contains(table_name)) + { + std::set& existed_columns = storage_columns_map.at(table_name); + for (const auto& column : columns) + { + if (!existed_columns.contains(column.name)) + { + storage_map.erase(table_name); + storage_columns_map.erase(table_name); + } + } + } if (!storage_map.contains(table_name)) { storage_map.emplace(table_name, creator()); + storage_columns_map.emplace(table_name, std::set()); + for (const auto& column : storage_map.at(table_name)->getInMemoryMetadataPtr()->columns) + { + storage_columns_map.at(table_name).emplace(column.name); + } } } return storage_map.at(table_name); @@ -38,6 +55,7 @@ local_engine::StorageInMemoryMetadataPtr local_engine::StorageMergeTreeFactory:: std::unordered_map local_engine::StorageMergeTreeFactory::storage_map; +std::unordered_map> local_engine::StorageMergeTreeFactory::storage_columns_map; std::mutex local_engine::StorageMergeTreeFactory::storage_map_mutex; std::unordered_map local_engine::StorageMergeTreeFactory::metadata_map; diff --git a/utils/local-engine/Storages/StorageMergeTreeFactory.h b/utils/local-engine/Storages/StorageMergeTreeFactory.h index a9ed3f03f083..b595f265b391 100644 --- a/utils/local-engine/Storages/StorageMergeTreeFactory.h +++ b/utils/local-engine/Storages/StorageMergeTreeFactory.h @@ -10,12 +10,13 @@ class StorageMergeTreeFactory { public: static StorageMergeTreeFactory & instance(); - static CustomStorageMergeTreePtr getStorage(StorageID id, std::function creator); + static CustomStorageMergeTreePtr getStorage(StorageID id, ColumnsDescription columns, std::function creator); static StorageInMemoryMetadataPtr getMetadata(StorageID id, std::function creator); private: static std::unordered_map storage_map; + static std::unordered_map> storage_columns_map; static std::mutex storage_map_mutex; static std::unordered_map metadata_map; diff --git a/utils/local-engine/cmake/libcxx/CMakeLists.txt b/utils/local-engine/cmake/libcxx/CMakeLists.txt deleted file mode 100644 index f329bd2742f4..000000000000 --- a/utils/local-engine/cmake/libcxx/CMakeLists.txt +++ /dev/null @@ -1,77 +0,0 @@ -include(CheckCXXCompilerFlag) - -set(LIBCXX_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxx") - -set(SRCS -"${LIBCXX_SOURCE_DIR}/src/algorithm.cpp" -"${LIBCXX_SOURCE_DIR}/src/any.cpp" -"${LIBCXX_SOURCE_DIR}/src/atomic.cpp" -"${LIBCXX_SOURCE_DIR}/src/barrier.cpp" -"${LIBCXX_SOURCE_DIR}/src/bind.cpp" -"${LIBCXX_SOURCE_DIR}/src/charconv.cpp" -"${LIBCXX_SOURCE_DIR}/src/chrono.cpp" -"${LIBCXX_SOURCE_DIR}/src/condition_variable.cpp" -"${LIBCXX_SOURCE_DIR}/src/condition_variable_destructor.cpp" -"${LIBCXX_SOURCE_DIR}/src/debug.cpp" -"${LIBCXX_SOURCE_DIR}/src/exception.cpp" -"${LIBCXX_SOURCE_DIR}/src/experimental/memory_resource.cpp" -"${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp" -"${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp" -"${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp" -"${LIBCXX_SOURCE_DIR}/src/functional.cpp" -"${LIBCXX_SOURCE_DIR}/src/future.cpp" -"${LIBCXX_SOURCE_DIR}/src/hash.cpp" -"${LIBCXX_SOURCE_DIR}/src/ios.cpp" -"${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp" -"${LIBCXX_SOURCE_DIR}/src/iostream.cpp" -"${LIBCXX_SOURCE_DIR}/src/locale.cpp" -"${LIBCXX_SOURCE_DIR}/src/memory.cpp" -"${LIBCXX_SOURCE_DIR}/src/mutex.cpp" -"${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp" -"${LIBCXX_SOURCE_DIR}/src/new.cpp" -"${LIBCXX_SOURCE_DIR}/src/optional.cpp" -"${LIBCXX_SOURCE_DIR}/src/random.cpp" -"${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp" -"${LIBCXX_SOURCE_DIR}/src/regex.cpp" -"${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp" -"${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp" -"${LIBCXX_SOURCE_DIR}/src/string.cpp" -"${LIBCXX_SOURCE_DIR}/src/strstream.cpp" -"${LIBCXX_SOURCE_DIR}/src/system_error.cpp" -"${LIBCXX_SOURCE_DIR}/src/thread.cpp" -"${LIBCXX_SOURCE_DIR}/src/typeinfo.cpp" -"${LIBCXX_SOURCE_DIR}/src/utility.cpp" -"${LIBCXX_SOURCE_DIR}/src/valarray.cpp" -"${LIBCXX_SOURCE_DIR}/src/variant.cpp" -"${LIBCXX_SOURCE_DIR}/src/vector.cpp" -) - -add_library(cxx SHARED ${SRCS}) - -target_include_directories(cxx SYSTEM BEFORE PUBLIC $) -target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) - -# Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () - -# Override the deduced attribute support that causes error. -if (OS_DARWIN AND COMPILER_GCC) - add_compile_definitions(_LIBCPP_INIT_PRIORITY_MAX) -endif () - -target_compile_options(cxx PUBLIC $<$:-nostdinc++>) - -# Third party library may have substandard code. -target_compile_options(cxx PRIVATE -w) - -target_link_libraries(cxx PUBLIC cxxabi) - -install( - TARGETS cxx - EXPORT global - ARCHIVE DESTINATION lib - RUNTIME DESTINATION lib - LIBRARY DESTINATION lib -) diff --git a/utils/local-engine/cmake/libcxxabi/CMakeLists.txt b/utils/local-engine/cmake/libcxxabi/CMakeLists.txt deleted file mode 100644 index a898de972722..000000000000 --- a/utils/local-engine/cmake/libcxxabi/CMakeLists.txt +++ /dev/null @@ -1,48 +0,0 @@ -set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi") - -set(SRCS -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp" -"${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp" -) - -add_library(cxxabi SHARED ${SRCS}) - -# Third party library may have substandard code. -target_compile_options(cxxabi PRIVATE -w) - -target_include_directories(cxxabi SYSTEM BEFORE - PUBLIC $ - PRIVATE $ -) -target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) -target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. -target_link_libraries(cxxabi PUBLIC ${EXCEPTION_HANDLING_LIBRARY}) - -# Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () - -install( - TARGETS cxxabi - EXPORT global - ARCHIVE DESTINATION lib - RUNTIME DESTINATION lib - LIBRARY DESTINATION lib -) diff --git a/utils/local-engine/cmake/libunwind/CMakeLists.txt b/utils/local-engine/cmake/libunwind/CMakeLists.txt deleted file mode 100644 index dae93e5a76ea..000000000000 --- a/utils/local-engine/cmake/libunwind/CMakeLists.txt +++ /dev/null @@ -1,74 +0,0 @@ -include(CheckCCompilerFlag) -include(CheckCXXCompilerFlag) - -set(LIBUNWIND_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libunwind") - -set(LIBUNWIND_CXX_SOURCES - "${LIBUNWIND_SOURCE_DIR}/src/libunwind.cpp" - "${LIBUNWIND_SOURCE_DIR}/src/Unwind-EHABI.cpp" - "${LIBUNWIND_SOURCE_DIR}/src/Unwind-seh.cpp") -if (APPLE) - set(LIBUNWIND_CXX_SOURCES ${LIBUNWIND_CXX_SOURCES} "${LIBUNWIND_SOURCE_DIR}/src/Unwind_AppleExtras.cpp") -endif () - -set(LIBUNWIND_C_SOURCES - "${LIBUNWIND_SOURCE_DIR}/src/UnwindLevel1.c" - "${LIBUNWIND_SOURCE_DIR}/src/UnwindLevel1-gcc-ext.c" - "${LIBUNWIND_SOURCE_DIR}/src/Unwind-sjlj.c" - # Use unw_backtrace to override libgcc's backtrace symbol for better ABI compatibility - "${ClickHouse_SOURCE_DIR}/contrib/libunwind-cmake/unwind-override.c") -set_source_files_properties(${LIBUNWIND_C_SOURCES} PROPERTIES COMPILE_FLAGS "-std=c99") - -set(LIBUNWIND_ASM_SOURCES - "${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersRestore.S" - "${LIBUNWIND_SOURCE_DIR}/src/UnwindRegistersSave.S") - -# CMake doesn't pass the correct architecture for Apple prior to CMake 3.19 [1] -# Workaround these two issues by compiling as C. -# -# [1]: https://gitlab.kitware.com/cmake/cmake/-/issues/20771 -if (APPLE AND CMAKE_VERSION VERSION_LESS 3.19) - set_source_files_properties(${LIBUNWIND_ASM_SOURCES} PROPERTIES LANGUAGE C) -else() - enable_language(ASM) -endif() - -set(LIBUNWIND_SOURCES - ${LIBUNWIND_CXX_SOURCES} - ${LIBUNWIND_C_SOURCES} - ${LIBUNWIND_ASM_SOURCES}) - -add_library(unwind SHARED ${LIBUNWIND_SOURCES}) - -target_include_directories(unwind SYSTEM BEFORE PUBLIC $) -target_compile_definitions(unwind PRIVATE -D_LIBUNWIND_NO_HEAP=1 -D_DEBUG -D_LIBUNWIND_IS_NATIVE_ONLY) -target_compile_options(unwind PRIVATE -fno-exceptions -funwind-tables -fno-sanitize=all $<$:-nostdinc++ -fno-rtti>) - -check_c_compiler_flag(-Wunused-but-set-variable HAVE_WARNING_UNUSED_BUT_SET_VARIABLE) -if (HAVE_WARNING_UNUSED_BUT_SET_VARIABLE) - target_compile_options(unwind PRIVATE -Wno-unused-but-set-variable) -endif () - -check_cxx_compiler_flag(-Wmissing-attributes HAVE_WARNING_MISSING_ATTRIBUTES) -if (HAVE_WARNING_MISSING_ATTRIBUTES) - target_compile_options(unwind PRIVATE -Wno-missing-attributes) -endif () - -check_cxx_compiler_flag(-Wmaybe-uninitialized HAVE_WARNING_MAYBE_UNINITIALIZED) -if (HAVE_WARNING_MAYBE_UNINITIALIZED) - target_compile_options(unwind PRIVATE -Wno-maybe-uninitialized) -endif () - -# The library is using register variables that are bound to specific registers -# Example: DwarfInstructions.hpp: register unsigned long long x16 __asm("x16") = cfa; -check_cxx_compiler_flag(-Wregister HAVE_WARNING_REGISTER) -if (HAVE_WARNING_REGISTER) - target_compile_options(unwind PRIVATE "$<$:-Wno-register>") -endif () - -install( - TARGETS unwind - EXPORT global - LIBRARY DESTINATION lib - ARCHIVE DESTINATION lib -) diff --git a/utils/local-engine/local_engine_jni.cpp b/utils/local-engine/local_engine_jni.cpp index 8f367b002e37..f81edde25241 100644 --- a/utils/local-engine/local_engine_jni.cpp +++ b/utils/local-engine/local_engine_jni.cpp @@ -66,7 +66,6 @@ jint JNI_OnLoad(JavaVM * vm, void * reserved) void JNI_OnUnload(JavaVM * vm, void * reserved) { - std::cerr << "JNI_OnUnload" << std::endl; JNIEnv * env; vm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_8); @@ -330,6 +329,10 @@ jstring Java_io_glutenproject_vectorized_CHNativeBlock_nativeColumnType(JNIEnv * { type = "Long"; } + else if (which.isUInt64()) + { + type = "Long"; + } else if (which.isInt8()) { type = "Byte"; @@ -342,9 +345,16 @@ jstring Java_io_glutenproject_vectorized_CHNativeBlock_nativeColumnType(JNIEnv * { type = "String"; } + else if (which.isAggregateFunction()) + { + type = "Binary"; + } else { - throw std::runtime_error("unsupported datatype " + std::string(block->getByPosition(position).type->getFamilyName())); + auto type_name = std::string(block->getByPosition(position).type->getFamilyName()); + auto col_name = block->getByPosition(position).name; + LOG_ERROR(&Poco::Logger::get("jni"), "column {}, unsupported datatype {}", col_name, type_name); + throw std::runtime_error("unsupported datatype " + type_name); } return charTojstring(env, type.c_str()); @@ -467,11 +477,13 @@ jlong Java_io_glutenproject_vectorized_CHShuffleSplitterJniWrapper_nativeMake(JN std::vector expr_vec; if (expr_list != nullptr) { - std::string exprs; int len = env->GetArrayLength(expr_list); - exprs.reserve(len); - env->GetByteArrayRegion(expr_list, 0, len, reinterpret_cast(exprs.data())); - for (auto expr :stringSplit(exprs, ',')) + auto * str = reinterpret_cast(new char[len]); + memset(str,0, len); + env->GetByteArrayRegion(expr_list, 0, len, str); + std::string exprs(str, str+len); + delete[] str; + for (const auto& expr :stringSplit(exprs, ',')) { expr_vec.emplace_back(expr); } diff --git a/utils/local-engine/tests/benchmark_local_engine.cpp b/utils/local-engine/tests/benchmark_local_engine.cpp index c0d98c3fde7b..26bbd16e347d 100644 --- a/utils/local-engine/tests/benchmark_local_engine.cpp +++ b/utils/local-engine/tests/benchmark_local_engine.cpp @@ -1167,8 +1167,8 @@ static void BM_TestDecompress(benchmark::State& state) //BENCHMARK(BM_SIMDFilter)->Arg(1)->Arg(0)->Unit(benchmark::kMillisecond)->Iterations(40); //BENCHMARK(BM_NormalFilter)->Arg(1)->Arg(0)->Unit(benchmark::kMillisecond)->Iterations(40); //BENCHMARK(BM_TPCH_Q6)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); -BENCHMARK(BM_MERGE_TREE_TPCH_Q6)->Unit(benchmark::kMillisecond)->Iterations(50)->Repetitions(6)->ComputeStatistics("80%", quantile); -//BENCHMARK(BM_MERGE_TREE_TPCH_Q6_NEW)->Unit(benchmark::kMillisecond)->Iterations(100); +BENCHMARK(BM_MERGE_TREE_TPCH_Q6)->Unit(benchmark::kMillisecond)->Iterations(10); +BENCHMARK(BM_MERGE_TREE_TPCH_Q6_NEW)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_CHColumnToSparkRowWithString)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10); //BENCHMARK(BM_SparkRowToCHColumn)->Arg(1)->Arg(3)->Arg(30)->Arg(90)->Arg(150)->Unit(benchmark::kMillisecond)->Iterations(10);