From 4af499fb5731c744462dbd2daede0fcc0f1ebf05 Mon Sep 17 00:00:00 2001 From: Jia Date: Fri, 24 Nov 2023 07:43:22 +0000 Subject: [PATCH] fix the compile issue --- cpp/velox/benchmarks/ColumnarToRowBenchmark.cc | 4 +++- cpp/velox/benchmarks/ParquetWriteBenchmark.cc | 4 +++- cpp/velox/benchmarks/common/BenchmarkUtils.h | 6 +++++- cpp/velox/compute/WholeStageResultIterator.cc | 5 +++++ cpp/velox/memory/VeloxColumnarBatch.cc | 8 +++++--- cpp/velox/tests/CMakeLists.txt | 3 ++- cpp/velox/tests/VeloxColumnarBatchSerializerTest.cc | 3 ++- cpp/velox/utils/VeloxArrowUtils.cc | 5 +++-- cpp/velox/utils/VeloxArrowUtils.h | 10 ++++++++++ ep/build-velox/src/build_velox.sh | 1 + 10 files changed, 39 insertions(+), 10 deletions(-) diff --git a/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc b/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc index 0951b08bf7fc..093099c755d2 100644 --- a/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc +++ b/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc @@ -35,6 +35,7 @@ #include "memory/VeloxMemoryManager.h" #include "operators/serializer/VeloxColumnarToRowConverter.h" #include "utils/TestUtils.h" +#include "utils/VeloxArrowUtils.h" #include "utils/macros.h" #include "velox/vector/arrow/Bridge.h" @@ -95,7 +96,8 @@ class GoogleBenchmarkColumnarToRow { ArrowArray arrowArray; ArrowSchema arrowSchema; ASSERT_NOT_OK(arrow::ExportRecordBatch(rb, &arrowArray, &arrowSchema)); - return velox::importFromArrowAsOwner(arrowSchema, arrowArray, gluten::defaultLeafVeloxMemoryPool().get()); + return velox::importFromArrowAsOwner( + arrowSchema, arrowArray, ArrowUtils::getBridgeOptions(), gluten::defaultLeafVeloxMemoryPool().get()); } protected: diff --git a/cpp/velox/benchmarks/ParquetWriteBenchmark.cc b/cpp/velox/benchmarks/ParquetWriteBenchmark.cc index 568b903d69d1..dc3243695037 100644 --- a/cpp/velox/benchmarks/ParquetWriteBenchmark.cc +++ b/cpp/velox/benchmarks/ParquetWriteBenchmark.cc @@ -39,6 +39,7 @@ #include "memory/ColumnarBatch.h" #include "memory/VeloxMemoryManager.h" #include "utils/TestUtils.h" +#include "utils/VeloxArrowUtils.h" #include "utils/macros.h" #include "velox/dwio/parquet/writer/Writer.h" #include "velox/vector/arrow/Bridge.h" @@ -101,7 +102,8 @@ class GoogleBenchmarkParquetWrite { ArrowArray arrowArray; ArrowSchema arrowSchema; ASSERT_NOT_OK(arrow::ExportRecordBatch(rb, &arrowArray, &arrowSchema)); - auto vp = velox::importFromArrowAsOwner(arrowSchema, arrowArray, gluten::defaultLeafVeloxMemoryPool().get()); + auto vp = velox::importFromArrowAsOwner( + arrowSchema, arrowArray, gluten::ArrowUtils::getBridgeOptions(), gluten::defaultLeafVeloxMemoryPool().get()); return std::make_shared(std::dynamic_pointer_cast(vp)); } diff --git a/cpp/velox/benchmarks/common/BenchmarkUtils.h b/cpp/velox/benchmarks/common/BenchmarkUtils.h index 16a22cdb1d50..6fb0524159a9 100644 --- a/cpp/velox/benchmarks/common/BenchmarkUtils.h +++ b/cpp/velox/benchmarks/common/BenchmarkUtils.h @@ -30,6 +30,7 @@ #include "memory/VeloxColumnarBatch.h" #include "memory/VeloxMemoryManager.h" #include "shuffle/Options.h" +#include "utils/VeloxArrowUtils.h" #include "utils/exception.h" #include "velox/common/memory/Memory.h" #include "velox/dwio/common/tests/utils/DataFiles.h" @@ -95,7 +96,10 @@ void abortIfFileNotExists(const std::string& filepath); inline std::shared_ptr convertBatch(std::shared_ptr cb) { if (cb->getType() != "velox") { auto vp = facebook::velox::importFromArrowAsOwner( - *cb->exportArrowSchema(), *cb->exportArrowArray(), gluten::defaultLeafVeloxMemoryPool().get()); + *cb->exportArrowSchema(), + *cb->exportArrowArray(), + gluten::ArrowUtils::getBridgeOptions(), + gluten::defaultLeafVeloxMemoryPool().get()); return std::make_shared(std::dynamic_pointer_cast(vp)); } else { return cb; diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 14d745e63022..8f532f230d90 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -378,6 +378,9 @@ std::unordered_map WholeStageResultIterator::getQueryC getConfigValue(confMap_, kBloomFilterNumBits, "8388608"); configs[velox::core::QueryConfig::kSparkBloomFilterMaxNumBits] = getConfigValue(confMap_, kBloomFilterMaxNumBits, "4194304"); + + configs[velox::core::QueryConfig::kArrowBridgeTimestampUnit] = 2; + } catch (const std::invalid_argument& err) { std::string errDetails = err.what(); throw std::runtime_error("Invalid conf arg: " + errDetails); @@ -407,6 +410,8 @@ std::shared_ptr WholeStageResultIterator::createConnectorConfig() // The semantics of reading as lower case is opposite with case-sensitive. configs[velox::connector::hive::HiveConfig::kFileColumnNamesReadAsLowerCase] = getConfigValue(confMap_, kCaseSensitive, "false") == "false" ? "true" : "false"; + configs[velox::connector::hive::HiveConfig::kArrowBridgeTimestampUnit] = 2; + return std::make_shared(configs); } diff --git a/cpp/velox/memory/VeloxColumnarBatch.cc b/cpp/velox/memory/VeloxColumnarBatch.cc index 991a4e9b1a0c..85da7f7c442c 100644 --- a/cpp/velox/memory/VeloxColumnarBatch.cc +++ b/cpp/velox/memory/VeloxColumnarBatch.cc @@ -16,6 +16,7 @@ */ #include "VeloxColumnarBatch.h" #include "compute/VeloxRuntime.h" +#include "utils/VeloxArrowUtils.h" #include "velox/row/UnsafeRowFast.h" #include "velox/type/Type.h" #include "velox/vector/FlatVector.h" @@ -65,14 +66,14 @@ void VeloxColumnarBatch::ensureFlattened() { std::shared_ptr VeloxColumnarBatch::exportArrowSchema() { auto out = std::make_shared(); ensureFlattened(); - velox::exportToArrow(flattened_, *out); + velox::exportToArrow(flattened_, ArrowUtils::getBridgeOptions(), *out); return out; } std::shared_ptr VeloxColumnarBatch::exportArrowArray() { auto out = std::make_shared(); ensureFlattened(); - velox::exportToArrow(flattened_, *out, flattened_->pool()); + velox::exportToArrow(flattened_, ArrowUtils::getBridgeOptions(), *out, flattened_->pool()); return out; } @@ -117,7 +118,8 @@ std::shared_ptr VeloxColumnarBatch::from( auto compositeVeloxVector = makeRowVector(childNames, childVectors, cb->numRows(), pool); return std::make_shared(compositeVeloxVector); } - auto vp = velox::importFromArrowAsOwner(*cb->exportArrowSchema(), *cb->exportArrowArray(), pool); + auto vp = velox::importFromArrowAsOwner( + *cb->exportArrowSchema(), *cb->exportArrowArray(), ArrowUtils::getBridgeOptions(), pool); return std::make_shared(std::dynamic_pointer_cast(vp)); } diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt index c2d412a56b0b..0b4b27ee4722 100644 --- a/cpp/velox/tests/CMakeLists.txt +++ b/cpp/velox/tests/CMakeLists.txt @@ -31,7 +31,8 @@ function(add_velox_test TEST_EXEC) message(FATAL_ERROR "No sources specified for test ${TEST_NAME}") endif() add_executable(${TEST_EXEC} ${SOURCES}) - target_include_directories(${TEST_EXEC} PRIVATE ${CMAKE_SOURCE_DIR}/velox ${CMAKE_SOURCE_DIR}/src) + message(STATUS "!!!!!${VELOX_BUILD_PATH}/_deps/duckdb-src/include/") + target_include_directories(${TEST_EXEC} PRIVATE ${CMAKE_SOURCE_DIR}/velox ${CMAKE_SOURCE_DIR}/src ${VELOX_BUILD_PATH}/_deps/duckdb-src/src/include) target_link_libraries(${TEST_EXEC} velox GTest::gtest GTest::gtest_main google::glog benchmark::benchmark simdjson) gtest_discover_tests(${TEST_EXEC} DISCOVERY_MODE PRE_TEST) endfunction() diff --git a/cpp/velox/tests/VeloxColumnarBatchSerializerTest.cc b/cpp/velox/tests/VeloxColumnarBatchSerializerTest.cc index 86a93583aa0a..3c6364f636e4 100644 --- a/cpp/velox/tests/VeloxColumnarBatchSerializerTest.cc +++ b/cpp/velox/tests/VeloxColumnarBatchSerializerTest.cc @@ -21,6 +21,7 @@ #include "memory/VeloxColumnarBatch.h" #include "memory/VeloxMemoryManager.h" #include "operators/serializer/VeloxColumnarBatchSerializer.h" +#include "utils/VeloxArrowUtils.h" #include "velox/vector/arrow/Bridge.h" #include "velox/vector/tests/utils/VectorTestBase.h" @@ -52,7 +53,7 @@ TEST_F(VeloxColumnarBatchSerializerTest, serialize) { auto buffer = serializer->serializeColumnarBatches({batch}); ArrowSchema cSchema; - exportToArrow(vector, cSchema); + exportToArrow(vector, ArrowUtils::getBridgeOptions(), cSchema); auto deserializer = std::make_shared(arrowPool_.get(), veloxPool_, &cSchema); auto deserialized = deserializer->deserialize(const_cast(buffer->data()), buffer->size()); auto deserializedVector = std::dynamic_pointer_cast(deserialized)->getRowVector(); diff --git a/cpp/velox/utils/VeloxArrowUtils.cc b/cpp/velox/utils/VeloxArrowUtils.cc index db1458d15035..d91cb58bbf2f 100644 --- a/cpp/velox/utils/VeloxArrowUtils.cc +++ b/cpp/velox/utils/VeloxArrowUtils.cc @@ -26,7 +26,7 @@ namespace gluten { using namespace facebook; void toArrowSchema(const velox::TypePtr& rowType, facebook::velox::memory::MemoryPool* pool, struct ArrowSchema* out) { - exportToArrow(velox::BaseVector::create(rowType, 0, pool), *out); + exportToArrow(velox::BaseVector::create(rowType, 0, pool), ArrowUtils::getBridgeOptions(), *out); } std::shared_ptr toArrowSchema(const velox::TypePtr& rowType, facebook::velox::memory::MemoryPool* pool) { @@ -50,7 +50,8 @@ arrow::Result> recordBatch2VeloxColumnarBatch(con ArrowArray arrowArray; ArrowSchema arrowSchema; RETURN_NOT_OK(arrow::ExportRecordBatch(rb, &arrowArray, &arrowSchema)); - auto vp = velox::importFromArrowAsOwner(arrowSchema, arrowArray, gluten::defaultLeafVeloxMemoryPool().get()); + auto vp = velox::importFromArrowAsOwner( + arrowSchema, arrowArray, ArrowUtils::getBridgeOptions(), gluten::defaultLeafVeloxMemoryPool().get()); return std::make_shared(std::dynamic_pointer_cast(vp)); } diff --git a/cpp/velox/utils/VeloxArrowUtils.h b/cpp/velox/utils/VeloxArrowUtils.h index 763ceeb7fa6b..d310877d3dbb 100644 --- a/cpp/velox/utils/VeloxArrowUtils.h +++ b/cpp/velox/utils/VeloxArrowUtils.h @@ -25,9 +25,19 @@ #include "velox/buffer/Buffer.h" #include "velox/common/memory/MemoryPool.h" #include "velox/type/Type.h" +#include "velox/vector/arrow/Bridge.h" namespace gluten { +class ArrowUtils { + public: + static facebook::velox::BridgeOptions getBridgeOptions() { + facebook::velox::BridgeOptions options; + options.timestampUnit = static_cast(2); + return options; + } +}; + void toArrowSchema( const facebook::velox::TypePtr& rowType, facebook::velox::memory::MemoryPool* pool, diff --git a/ep/build-velox/src/build_velox.sh b/ep/build-velox/src/build_velox.sh index b049af1a914d..62871ba5e1d1 100755 --- a/ep/build-velox/src/build_velox.sh +++ b/ep/build-velox/src/build_velox.sh @@ -131,6 +131,7 @@ function compile { echo "COMPILE_OPTION: "$COMPILE_OPTION export simdjson_SOURCE=BUNDLED + export duckdb_SOURCE=BUNDLED if [ $ARCH == 'x86_64' ]; then make $COMPILE_TYPE EXTRA_CMAKE_FLAGS="${COMPILE_OPTION}" elif [[ "$ARCH" == 'arm64' || "$ARCH" == 'aarch64' ]]; then