From 497c5c7146e3c988d0a7d04d43540f5d97b8b894 Mon Sep 17 00:00:00 2001 From: yangchuan Date: Thu, 9 Nov 2023 10:19:25 +0800 Subject: [PATCH 1/5] fix fix fix fix --- cpp/CMakeLists.txt | 8 +- cpp/core/CMakeLists.txt | 1 - cpp/core/benchmarks/CompressionBenchmark.cc | 23 +-- cpp/core/compute/Runtime.h | 3 +- cpp/core/jni/JniCommon.h | 15 +- cpp/core/jni/JniWrapper.cc | 8 +- cpp/core/shuffle/LocalPartitionWriter.cc | 2 - cpp/core/tests/RoundRobinPartitionerTest.cc | 10 +- cpp/core/utils/DebugOut.cc | 28 --- cpp/core/utils/DebugOut.h | 47 ----- cpp/core/utils/ObjectStore.cc | 3 +- cpp/core/utils/Print.h | 187 ------------------ cpp/core/utils/macros.h | 1 + cpp/core/utils/qpl/qpl_job_pool.cc | 4 +- .../benchmarks/ColumnarToRowBenchmark.cc | 12 +- cpp/velox/benchmarks/GenericBenchmark.cc | 31 ++- cpp/velox/benchmarks/ParquetWriteBenchmark.cc | 18 +- cpp/velox/benchmarks/QueryBenchmark.cc | 2 +- cpp/velox/benchmarks/ShuffleSplitBenchmark.cc | 14 +- cpp/velox/benchmarks/common/BenchmarkUtils.cc | 4 +- cpp/velox/benchmarks/common/BenchmarkUtils.h | 2 + .../benchmarks/common/FileReaderIterator.h | 1 - .../benchmarks/common/OrcReaderIterator.h | 10 - .../benchmarks/common/ParquetReaderIterator.h | 11 +- cpp/velox/compute/VeloxPlanConverter.cc | 2 - cpp/velox/compute/WholeStageResultIterator.cc | 4 +- cpp/velox/jni/VeloxJniWrapper.cc | 2 - cpp/velox/udf/examples/MyUDF.cpp | 2 +- 28 files changed, 81 insertions(+), 374 deletions(-) delete mode 100644 cpp/core/utils/DebugOut.cc delete mode 100644 cpp/core/utils/DebugOut.h diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 48eaa4f83a57..176d08b716b8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -74,13 +74,7 @@ include(ResolveDependency) # Compiler flags # -if (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo") -# keep this original logic - add_definitions(-DGLUTEN_PRINT_DEBUG) - message(STATUS "Add definition GLUTEN_PRINT_DEBUG") -elseif (${CMAKE_BUILD_TYPE} STREQUAL "Debug") - add_definitions(-DGLUTEN_PRINT_DEBUG) - message(STATUS "Add definition GLUTEN_PRINT_DEBUG") +if (${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb -O0") message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}") else () diff --git a/cpp/core/CMakeLists.txt b/cpp/core/CMakeLists.txt index fb7c714a3ef8..b1e97d83f8b0 100644 --- a/cpp/core/CMakeLists.txt +++ b/cpp/core/CMakeLists.txt @@ -209,7 +209,6 @@ set(SPARK_COLUMNAR_PLUGIN_SRCS shuffle/rss/CelebornPartitionWriter.cc shuffle/Utils.cc utils/Compression.cc - utils/DebugOut.cc utils/StringUtil.cc utils/ObjectStore.cc jni/JniError.cc diff --git a/cpp/core/benchmarks/CompressionBenchmark.cc b/cpp/core/benchmarks/CompressionBenchmark.cc index 3e2396818235..1966c8ef1c18 100644 --- a/cpp/core/benchmarks/CompressionBenchmark.cc +++ b/cpp/core/benchmarks/CompressionBenchmark.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -126,7 +127,7 @@ class BenchmarkCompression { } case gluten::kQatZstd: { ipcWriteOptions.codec = createArrowIpcCodec(arrow::Compression::ZSTD, CodecBackend::QAT); - std::cout << "load qatzstd" << std::endl; + LOG(INFO) << "load qatzstd"; break; } #endif @@ -163,7 +164,7 @@ class BenchmarkCompression { state); auto endTime = std::chrono::steady_clock::now(); auto totalTime = (endTime - startTime).count(); - std::cout << "Thread " << state.thread_index() << " took " << (1.0 * totalTime / 1e9) << "s" << std::endl; + LOG(INFO) << "Thread " << state.thread_index() << " took " << (1.0 * totalTime / 1e9) << "s"; state.counters["rowgroups"] = benchmark::Counter(rowGroupIndices_.size(), benchmark::Counter::kAvgThreads, benchmark::Counter::OneK::kIs1000); @@ -301,8 +302,8 @@ class BenchmarkCompressionCacheScanBenchmark final : public BenchmarkCompression } } while (recordBatch); - std::cout << "parquet parse done elapsed time " << elapseRead / 1e6 << " ms " << std::endl; - std::cout << "batches = " << numBatches << " rows = " << numRows << std::endl; + LOG(INFO) << "parquet parse done elapsed time " << elapseRead / 1e6 << " ms "; + LOG(INFO) << "batches = " << numBatches << " rows = " << numRows; std::vector> payloads(batches.size()); std::vector> uncompressedBufferSize(batches.size()); @@ -418,16 +419,16 @@ int main(int argc, char** argv) { } else if (strcmp(argv[i], "--file") == 0) { datafile = argv[i + 1]; } else if (strcmp(argv[i], "--qat-gzip") == 0) { - std::cout << "QAT gzip is used as codec" << std::endl; + LOG(INFO) << "QAT gzip is used as codec"; codec = gluten::kQatGzip; } else if (strcmp(argv[i], "--qat-zstd") == 0) { - std::cout << "QAT zstd is used as codec" << std::endl; + LOG(INFO) << "QAT zstd is used as codec"; codec = gluten::kQatZstd; } else if (strcmp(argv[i], "--qpl-gzip") == 0) { - std::cout << "QPL gzip is used as codec" << std::endl; + LOG(INFO) << "QPL gzip is used as codec"; codec = gluten::kQplGzip; } else if (strcmp(argv[i], "--zstd") == 0) { - std::cout << "CPU zstd is used as codec" << std::endl; + LOG(INFO) << "CPU zstd is used as codec"; codec = gluten::kZstd; } else if (strcmp(argv[i], "--buffer-size") == 0) { compressBufferSize = atol(argv[i + 1]); @@ -435,9 +436,9 @@ int main(int argc, char** argv) { cpuOffset = atol(argv[i + 1]); } } - std::cout << "iterations = " << iterations << std::endl; - std::cout << "threads = " << threads << std::endl; - std::cout << "datafile = " << datafile << std::endl; + LOG(INFO) << "iterations = " << iterations; + LOG(INFO) << "threads = " << threads; + LOG(INFO) << "datafile = " << datafile; gluten::BenchmarkCompressionIterateScanBenchmark bmIterateScan(datafile, compressBufferSize); gluten::BenchmarkCompressionCacheScanBenchmark bmCacheScan(datafile, compressBufferSize); diff --git a/cpp/core/compute/Runtime.h b/cpp/core/compute/Runtime.h index 5d86656037dc..59ba0ce6e0de 100644 --- a/cpp/core/compute/Runtime.h +++ b/cpp/core/compute/Runtime.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "compute/ProtobufUtils.h" #include "compute/ResultIterator.h" #include "memory/ArrowMemoryPool.h" @@ -29,7 +31,6 @@ #include "shuffle/ShuffleReader.h" #include "shuffle/ShuffleWriter.h" #include "substrait/plan.pb.h" -#include "utils/DebugOut.h" #include "utils/ObjectStore.h" namespace gluten { diff --git a/cpp/core/jni/JniCommon.h b/cpp/core/jni/JniCommon.h index 265382aa2d20..468224899cb2 100644 --- a/cpp/core/jni/JniCommon.h +++ b/cpp/core/jni/JniCommon.h @@ -28,7 +28,6 @@ #include "memory/AllocationListener.h" #include "shuffle/rss/RssClient.h" #include "utils/Compression.h" -#include "utils/DebugOut.h" #include "utils/exception.h" static jint jniVersion = JNI_VERSION_1_8; @@ -52,7 +51,7 @@ static inline void checkException(JNIEnv* env) { std::string description = jStringToCString(env, (jstring)env->CallStaticObjectMethod(describerClass, describeMethod, t)); if (env->ExceptionCheck()) { - std::cerr << "Fatal: Uncaught Java exception during calling the Java exception describer method! " << std::endl; + LOG(WARNING) << "Fatal: Uncaught Java exception during calling the Java exception describer method! "; } throw gluten::GlutenException("Error during calling Java code from native code: " + description); } @@ -106,13 +105,11 @@ static inline jmethodID getStaticMethodIdOrError(JNIEnv* env, jclass thisClass, static inline void attachCurrentThreadAsDaemonOrThrow(JavaVM* vm, JNIEnv** out) { int getEnvStat = vm->GetEnv(reinterpret_cast(out), jniVersion); if (getEnvStat == JNI_EDETACHED) { - DEBUG_OUT << "JNIEnv was not attached to current thread." << std::endl; // Reattach current thread to JVM getEnvStat = vm->AttachCurrentThreadAsDaemon(reinterpret_cast(out), NULL); if (getEnvStat != JNI_OK) { throw gluten::GlutenException("Failed to reattach current thread to JVM."); } - DEBUG_OUT << "Succeeded attaching current thread." << std::endl; return; } if (getEnvStat != JNI_OK) { @@ -161,7 +158,7 @@ static inline void backtrace() { auto size = backtrace(array, 1024); char** strings = backtrace_symbols(array, size); for (size_t i = 0; i < size; ++i) { - std::cout << strings[i] << std::endl; + LOG(INFO) << strings[i]; } free(strings); } @@ -229,8 +226,8 @@ class SparkAllocationListener final : public gluten::AllocationListener { ~SparkAllocationListener() override { JNIEnv* env; if (vm_->GetEnv(reinterpret_cast(&env), jniVersion) != JNI_OK) { - std::cerr << "SparkAllocationListener#~SparkAllocationListener(): " - << "JNIEnv was not attached to current thread" << std::endl; + LOG(WARNING) << "SparkAllocationListener#~SparkAllocationListener(): " + << "JNIEnv was not attached to current thread"; return; } env->DeleteGlobalRef(jListenerGlobalRef_); @@ -329,8 +326,8 @@ class CelebornClient : public RssClient { ~CelebornClient() { JNIEnv* env; if (vm_->GetEnv(reinterpret_cast(&env), jniVersion) != JNI_OK) { - std::cerr << "CelebornClient#~CelebornClient(): " - << "JNIEnv was not attached to current thread" << std::endl; + LOG(WARNING) << "CelebornClient#~CelebornClient(): " + << "JNIEnv was not attached to current thread"; return; } env->DeleteGlobalRef(javaCelebornShuffleWriter_); diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc index c3edf4a4c8e3..9b6fec83411d 100644 --- a/cpp/core/jni/JniWrapper.cc +++ b/cpp/core/jni/JniWrapper.cc @@ -18,7 +18,6 @@ #include #include -#include #include "compute/ProtobufUtils.h" #include "compute/Runtime.h" #include "config/GlutenConfig.h" @@ -92,11 +91,10 @@ class JavaInputStreamAdaptor final : public arrow::io::InputStream { try { auto status = JavaInputStreamAdaptor::Close(); if (!status.ok()) { - DEBUG_OUT << __func__ << " call JavaInputStreamAdaptor::Close() failed, status:" << status.ToString() - << std::endl; + LOG(WARNING) << __func__ << " call JavaInputStreamAdaptor::Close() failed, status:" << status.ToString(); } } catch (std::exception& e) { - DEBUG_OUT << __func__ << " call JavaInputStreamAdaptor::Close() got exception:" << e.what() << std::endl; + LOG(WARNING) << __func__ << " call JavaInputStreamAdaptor::Close() got exception:" << e.what(); } } @@ -836,7 +834,7 @@ JNIEXPORT jlong JNICALL Java_io_glutenproject_vectorized_ShuffleWriterJniWrapper jobject thread = env->CallStaticObjectMethod(cls, mid); checkException(env); if (thread == NULL) { - std::cerr << "Thread.currentThread() return NULL" << std::endl; + LOG(WARNING) << "Thread.currentThread() return NULL"; } else { jmethodID midGetid = getMethodIdOrError(env, cls, "getId", "()J"); jlong sid = env->CallLongMethod(thread, midGetid); diff --git a/cpp/core/shuffle/LocalPartitionWriter.cc b/cpp/core/shuffle/LocalPartitionWriter.cc index 313be10653b8..91225f7b5211 100644 --- a/cpp/core/shuffle/LocalPartitionWriter.cc +++ b/cpp/core/shuffle/LocalPartitionWriter.cc @@ -19,7 +19,6 @@ #include #include #include "shuffle/Utils.h" -#include "utils/DebugOut.h" #include "utils/StringUtil.h" #include "utils/Timer.h" @@ -129,7 +128,6 @@ class FlushOnSpillEvictHandle final : public LocalPartitionWriter::LocalEvictHan ARROW_ASSIGN_OR_RAISE(auto start, os_->Tell()); RETURN_NOT_OK(arrow::ipc::WriteIpcPayload(*payload, options_, os_.get(), &metadataLength)); ARROW_ASSIGN_OR_RAISE(auto end, os_->Tell()); - DEBUG_OUT << "Spilled partition " << partitionId << " file start: " << start << ", file end: " << end << std::endl; spillInfo_->partitionSpillInfos.push_back({partitionId, end - start}); return arrow::Status::OK(); } diff --git a/cpp/core/tests/RoundRobinPartitionerTest.cc b/cpp/core/tests/RoundRobinPartitionerTest.cc index 9c12b2c7e1ca..03701060be2e 100644 --- a/cpp/core/tests/RoundRobinPartitionerTest.cc +++ b/cpp/core/tests/RoundRobinPartitionerTest.cc @@ -15,6 +15,7 @@ * limitations under the License. */ #include "shuffle/RoundRobinPartitioner.h" +#include #include #include @@ -46,10 +47,11 @@ class RoundRobinPartitionerTest : public ::testing::Test { template void toString(const std::vector& vec, const std::string& name) const { - std::cout << name << " = ["; - std::copy(vec.cbegin(), vec.cend(), std::ostream_iterator(std::cout, ",")); - std::cout << " ]"; - std::cout << std::endl; + std::stringstream ss; + ss << name << " = ["; + std::copy(vec.cbegin(), vec.cend(), std::ostream_iterator(ss, ",")); + ss << " ]"; + LOG(INFO) << ss.str(); } int32_t getPidSelection() const { diff --git a/cpp/core/utils/DebugOut.cc b/cpp/core/utils/DebugOut.cc deleted file mode 100644 index 04ea3308f5b9..000000000000 --- a/cpp/core/utils/DebugOut.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DebugOut.h" - -namespace gluten { - -#ifndef GLUTEN_PRINT_DEBUG - -FakeOut fakeOut; - -#endif - -} // namespace gluten diff --git a/cpp/core/utils/DebugOut.h b/cpp/core/utils/DebugOut.h deleted file mode 100644 index 6d1b3b117d2e..000000000000 --- a/cpp/core/utils/DebugOut.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#ifdef GLUTEN_PRINT_DEBUG - -#define DEBUG_OUT std::cout - -#else - -namespace gluten { - -struct FakeOut { - template - FakeOut& operator<<(T t) { - return *this; - } - - FakeOut& operator<<(std::ostream& (*endl)(std::ostream& os)) { - return *this; - } -}; - -extern FakeOut fakeOut; - -} // namespace gluten - -#define DEBUG_OUT gluten::fakeOut - -#endif diff --git a/cpp/core/utils/ObjectStore.cc b/cpp/core/utils/ObjectStore.cc index 4a8e7c7ea2f4..5dc9c7843a73 100644 --- a/cpp/core/utils/ObjectStore.cc +++ b/cpp/core/utils/ObjectStore.cc @@ -16,6 +16,7 @@ */ #include "ObjectStore.h" +#include #include gluten::ObjectStore::~ObjectStore() { @@ -24,7 +25,7 @@ gluten::ObjectStore::~ObjectStore() { for (auto itr = aliveObjectHandles_.rbegin(); itr != aliveObjectHandles_.rend(); itr++) { ResourceHandle handle = *itr; if (store_.lookup(handle) == nullptr) { - std::cerr << "Fatal: resource handle " + std::to_string(handle) + " not found in store." << std::endl; + LOG(WARNING) << "Fatal: resource handle " + std::to_string(handle) + " not found in store."; } store_.erase(handle); } diff --git a/cpp/core/utils/Print.h b/cpp/core/utils/Print.h index 0ac64be1ff72..cf55c42ce133 100644 --- a/cpp/core/utils/Print.h +++ b/cpp/core/utils/Print.h @@ -19,7 +19,6 @@ #include #include #include -#include "DebugOut.h" namespace gluten { @@ -27,190 +26,6 @@ namespace gluten { // the token `ToString` means the method of `ToString()` // the token `2String` means the method of `toString()` -#ifdef GLUTEN_PRINT_DEBUG - -template -static inline void print(const T& t) { - std::cout << t; -} - -template -static inline void printLf(const T& t) { - std::cout << t << std::endl; -} - -template -static inline void print(const A& a, const B& b) { - std::cout << a << b; -} - -template -static inline void printLf(const A& a, const B& b) { - std::cout << a << b << std::endl; -} - -template -static inline void printSplit(const A& a, const B& b, const std::string split = ": ") { - std::cout << a << split << b; -} - -template -static inline void printSplitLf(const A& a, const B& b, const std::string split = ": ") { - std::cout << a << split << b << std::endl; -} - -template -static inline void printEq(const A& a, const B& b) { - std::cout << a << " = " << b; -} - -template -static inline void printEqlf(const A& a, const B& b) { - std::cout << a << " = " << b << std::endl; -} - -template -static inline void printVs(const A& a, const B& b) { - std::cout << a << " vs " << b; -} - -template -static inline void printVslf(const A& a, const B& b) { - std::cout << a << " vs " << b << std::endl; -} - -template -static inline void printElement(const E& e, bool first = false) { - if (!first) { - std::cout << ", "; - } - std::cout << e; -} - -template -static inline void printRange(ITERATOR begin, ITERATOR end) { - std::cout << "{ "; - for (; begin != end; ++begin) { - std::cout << *begin << " "; - } - std::cout << "}" << std::endl; -} - -template -static inline void printContainer(const C& c, const std::string& containerName = "") { - if (!containerName.empty()) { - std::cout << containerName << " "; - } - std::cout << "size = " << c.size() << " "; - PrintRange(c.begin(), c.end()); -} - -template -static inline void printAB2String(const A& a, const B& b) { - std::cout << a << " = " << b.toString() << std::endl; -} - -template -static inline void print2String(const T& t, const std::string& prefix = "") { - if (!prefix.empty()) { - std::cout << prefix << ": "; - } - std::cout << t.toString() << std::endl; -} - -template -static inline void printRangeToString(ITERATOR begin, ITERATOR end) { - std::cout << "{ "; - for (; begin != end; ++begin) { - std::cout << begin->ToString() << " "; - } - std::cout << "}"; -} - -template -static inline void printRange2String(ITERATOR begin, ITERATOR end) { - std::cout << "{ "; - for (; begin != end; ++begin) { - std::cout << begin->toString() << " "; - } - std::cout << "}"; -} - -template -static inline void printContainerToString(const C& c, const std::string& containerName = "") { - if (!containerName.empty()) { - std::cout << containerName << " "; - } - std::cout << "size = " << c.size() << std::endl; - PrintRangeToString(c.begin(), c.end()); -} - -template -static inline void printContainer2String(const C& c, const std::string& containerName = "") { - if (!containerName.empty()) { - std::cout << containerName << " "; - } - std::cout << "size = " << c.size() << std::endl; - PrintRange2String(c.begin(), c.end()); -} - -template -static inline void printVectorToString(const C& c, const std::string& containerName = "") { - std::cout << containerName << " = {"; - for (auto& x : c) { - std::cout << " " << x->ToString(); - } - std::cout << " }" << std::endl; -} - -template -static inline void printVector2String(const C& c, const std::string& containerName = "") { - std::cout << containerName << " = {"; - for (auto& x : c) { - std::cout << " " << x->toString(); - } - std::cout << " }" << std::endl; -} - -template -static inline void printVectorMapping(const V& v, const std::string& vectorName = "") { - std::cout << vectorName << "\n{\n"; - for (size_t i = 0; i < v.size(); ++i) { - print("\t"); - PrintSplitLF(i, v[i], " -> "); - } - std::cout << "}" << std::endl; -} - -template -static inline void printVectorRange(const V& v, unsigned int begin, unsigned int end) { - std::cout << "{"; - auto index = begin; - for (; index != end; ++index) { - PrintElement(v[index], index == begin); - } - - std::cout << "}" << std::endl; -} - -#define PRINT(a) PrintSplit(#a, a) -#define PRINTLF(a) PrintSplitLF(#a, a) - -#define PRINT_FUNCTION_NAME() std::cout << __func__ << std::endl; -#define PRINT_FUNCTION_SPLIT_LINE() std::cout << "===== " << __func__ << " ======" << std::endl; - -#define PRINT_CONTAINER(c) PrintContainer(c, #c) - -#define PRINT_CONTAINER_TO_STRING(v) PrintContainerToString(v, #v) -#define PRINT_CONTAINER_2_STRING(v) PrintContainer2String(v, #v) - -#define PRINT_VECTOR_TO_STRING(v) PrintVectorToString(v, #v) -#define PRINT_VECTOR_2_STRING(v) PrintVector2String(v, #v) - -#define PRINT_VECTOR_MAPPING(v) PrintVectorMapping(v, #v) - -#else // GLUTEN_PRINT_DEBUG - template static inline void Print(const T& t) {} @@ -293,6 +108,4 @@ static inline void PrintVectorRange(const V& v, unsigned int from, unsigned int #define PRINT_VECTOR_MAPPING(v) -#endif // GLUTEN_PRINT_DEBUG - } // namespace gluten diff --git a/cpp/core/utils/macros.h b/cpp/core/utils/macros.h index 6fd0f15c0454..fef8ae1e75de 100644 --- a/cpp/core/utils/macros.h +++ b/cpp/core/utils/macros.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "utils/exception.h" diff --git a/cpp/core/utils/qpl/qpl_job_pool.cc b/cpp/core/utils/qpl/qpl_job_pool.cc index f3ba2437799f..375551b5e37e 100644 --- a/cpp/core/utils/qpl/qpl_job_pool.cc +++ b/cpp/core/utils/qpl/qpl_job_pool.cc @@ -37,7 +37,7 @@ QplJobHWPool& QplJobHWPool::GetInstance() { QplJobHWPool::QplJobHWPool() : randomEngine(std::random_device()()), distribution(0, MAX_JOB_NUMBER - 1) { uint64_t initTime = 0; TIME_NANO(initTime, InitJobPool()); - DEBUG_OUT << "Init job pool took " << 1.0 * initTime / 1e6 << "ms" << std::endl; + LOG(INFO) << "Init job pool took " << 1.0 * initTime / 1e6 << "ms"; } QplJobHWPool::~QplJobHWPool() { @@ -96,7 +96,7 @@ qpl_job* QplJobHWPool::AcquireJob(uint32_t& jobId) { } } jobId = MAX_JOB_NUMBER - index; - DEBUG_OUT << "Acquired job index " << index << " after " << retry << " retries." << std::endl; + LOG(INFO) << "Acquired job index " << index << " after " << retry << " retries."; return jobPool[index]; } diff --git a/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc b/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc index 093099c755d2..2f2ff2ce1671 100644 --- a/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc +++ b/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc @@ -131,7 +131,7 @@ class GoogleBenchmarkColumnarToRowCacheScanBenchmark : public GoogleBenchmarkCol localSchema = std::make_shared(*schema_.get()); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); std::unique_ptr<::parquet::arrow::FileReader> parquetReader; std::shared_ptr recordBatchReader; @@ -150,7 +150,7 @@ class GoogleBenchmarkColumnarToRowCacheScanBenchmark : public GoogleBenchmarkCol } } while (recordBatch); - std::cout << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows << std::endl; + LOG(INFO) << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows; // reuse the columnarToRowConverter for batches caused system % increase a lot auto ctxPool = defaultLeafVeloxMemoryPool(); @@ -263,10 +263,10 @@ int main(int argc, char** argv) { cpu = atol(argv[i + 1]); } } - std::cout << "iterations = " << iterations << std::endl; - std::cout << "threads = " << threads << std::endl; - std::cout << "datafile = " << datafile << std::endl; - std::cout << "cpu = " << cpu << std::endl; + LOG(INFO) << "iterations = " << iterations; + LOG(INFO) << "threads = " << threads; + LOG(INFO) << "datafile = " << datafile; + LOG(INFO) << "cpu = " << cpu; gluten::GoogleBenchmarkColumnarToRowCacheScanBenchmark bck(datafile); diff --git a/cpp/velox/benchmarks/GenericBenchmark.cc b/cpp/velox/benchmarks/GenericBenchmark.cc index 24fa5ef7c014..f46012e8cd14 100644 --- a/cpp/velox/benchmarks/GenericBenchmark.cc +++ b/cpp/velox/benchmarks/GenericBenchmark.cc @@ -184,7 +184,7 @@ auto BM_Generic = [](::benchmark::State& state, return; } if (FLAGS_print_result) { - std::cout << maybeBatch.ValueOrDie()->ToString() << std::endl; + LOG(INFO) << maybeBatch.ValueOrDie()->ToString(); } } @@ -204,7 +204,7 @@ auto BM_Generic = [](::benchmark::State& state, const auto& task = rawIter->task_; const auto& planNode = rawIter->veloxPlan_; auto statsStr = facebook::velox::exec::printPlanWithStats(*planNode, task->taskStats(), true); - std::cout << statsStr << std::endl; + LOG(INFO) << statsStr; } Runtime::release(runtime); @@ -239,11 +239,10 @@ int main(int argc, char** argv) { try { if (argc < 2) { - std::cout + LOG(INFO) << "No input args. Usage: " << std::endl - << "./generic_benchmark /absolute-path/to/substrait_json_file /absolute-path/to/data_file_1 /absolute-path/to/data_file_2 ..." - << std::endl; - std::cout << "Running example..." << std::endl; + << "./generic_benchmark /absolute-path/to/substrait_json_file /absolute-path/to/data_file_1 /absolute-path/to/data_file_2 ..."; + LOG(INFO) << "Running example..."; inputFiles.resize(2); substraitJsonFile = getGeneratedFilePath("example.json"); inputFiles[0] = getGeneratedFilePath("example_orders"); @@ -251,16 +250,16 @@ int main(int argc, char** argv) { } else { substraitJsonFile = argv[1]; abortIfFileNotExists(substraitJsonFile); - std::cout << "Using substrait json file: " << std::endl << substraitJsonFile << std::endl; - std::cout << "Using " << argc - 2 << " input data file(s): " << std::endl; + LOG(INFO) << "Using substrait json file: " << std::endl << substraitJsonFile; + LOG(INFO) << "Using " << argc - 2 << " input data file(s): "; for (auto i = 2; i < argc; ++i) { inputFiles.emplace_back(argv[i]); abortIfFileNotExists(inputFiles.back()); - std::cout << inputFiles.back() << std::endl; + LOG(INFO) << inputFiles.back(); } } } catch (const std::exception& e) { - std::cout << "Failed to run benchmark: " << e.what() << std::endl; + LOG(INFO) << "Failed to run benchmark: " << e.what(); ::benchmark::Shutdown(); std::exit(EXIT_FAILURE); } @@ -281,12 +280,12 @@ int main(int argc, char** argv) { } while (0) #if 0 - std::cout << "FLAGS_threads:" << FLAGS_threads << std::endl; - std::cout << "FLAGS_iterations:" << FLAGS_iterations << std::endl; - std::cout << "FLAGS_cpu:" << FLAGS_cpu << std::endl; - std::cout << "FLAGS_print_result:" << FLAGS_print_result << std::endl; - std::cout << "FLAGS_write_file:" << FLAGS_write_file << std::endl; - std::cout << "FLAGS_batch_size:" << FLAGS_batch_size << std::endl; + LOG(INFO) << "FLAGS_threads:" << FLAGS_threads ; + LOG(INFO) << "FLAGS_iterations:" << FLAGS_iterations ; + LOG(INFO) << "FLAGS_cpu:" << FLAGS_cpu ; + LOG(INFO) << "FLAGS_print_result:" << FLAGS_print_result ; + LOG(INFO) << "FLAGS_write_file:" << FLAGS_write_file ; + LOG(INFO) << "FLAGS_batch_size:" << FLAGS_batch_size ; #endif if (FLAGS_skip_input) { diff --git a/cpp/velox/benchmarks/ParquetWriteBenchmark.cc b/cpp/velox/benchmarks/ParquetWriteBenchmark.cc index 08e2126e22d1..7958765fa644 100644 --- a/cpp/velox/benchmarks/ParquetWriteBenchmark.cc +++ b/cpp/velox/benchmarks/ParquetWriteBenchmark.cc @@ -141,7 +141,7 @@ class GoogleBenchmarkArrowParquetWriteCacheScanBenchmark : public GoogleBenchmar localSchema = std::make_shared(*schema_.get()); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); std::unique_ptr<::parquet::arrow::FileReader> parquetReader; std::shared_ptr recordBatchReader; @@ -160,7 +160,7 @@ class GoogleBenchmarkArrowParquetWriteCacheScanBenchmark : public GoogleBenchmar } } while (recordBatch); - std::cout << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows << std::endl; + LOG(INFO) << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows; // reuse the ParquetWriteConverter for batches caused system % increase a lot auto fileName = "arrow_parquet_write.parquet"; @@ -234,7 +234,7 @@ class GoogleBenchmarkVeloxParquetWriteCacheScanBenchmark : public GoogleBenchmar localSchema = std::make_shared(*schema_.get()); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); std::unique_ptr<::parquet::arrow::FileReader> parquetReader; std::shared_ptr recordBatchReader; @@ -253,7 +253,7 @@ class GoogleBenchmarkVeloxParquetWriteCacheScanBenchmark : public GoogleBenchmar } } while (recordBatch); - std::cout << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows << std::endl; + LOG(INFO) << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows; // reuse the ParquetWriteConverter for batches caused system % increase a lot auto fileName = "velox_parquet_write.parquet"; @@ -329,11 +329,11 @@ int main(int argc, char** argv) { output = (argv[i + 1]); } } - std::cout << "iterations = " << iterations << std::endl; - std::cout << "threads = " << threads << std::endl; - std::cout << "datafile = " << datafile << std::endl; - std::cout << "cpu = " << cpu << std::endl; - std::cout << "output = " << output << std::endl; + LOG(INFO) << "iterations = " << iterations; + LOG(INFO) << "threads = " << threads; + LOG(INFO) << "datafile = " << datafile; + LOG(INFO) << "cpu = " << cpu; + LOG(INFO) << "output = " << output; gluten::GoogleBenchmarkVeloxParquetWriteCacheScanBenchmark bck(datafile, output); diff --git a/cpp/velox/benchmarks/QueryBenchmark.cc b/cpp/velox/benchmarks/QueryBenchmark.cc index 7070267036d5..1f3ed3fbdf1c 100644 --- a/cpp/velox/benchmarks/QueryBenchmark.cc +++ b/cpp/velox/benchmarks/QueryBenchmark.cc @@ -101,7 +101,7 @@ auto BM = [](::benchmark::State& state, state.SkipWithError(maybeBatch.status().message().c_str()); return; } - std::cout << maybeBatch.ValueOrDie()->ToString() << std::endl; + LOG(INFO) << maybeBatch.ValueOrDie()->ToString(); } } Runtime::release(runtime); diff --git a/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc b/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc index c4aec9650e1b..a1e5eb906650 100644 --- a/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc +++ b/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc @@ -254,7 +254,7 @@ class BenchmarkShuffleSplitCacheScanBenchmark : public BenchmarkShuffleSplit { localSchema = std::make_shared(fields); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); auto pool = options.memory_pool; GLUTEN_ASSIGN_OR_THROW( @@ -279,8 +279,8 @@ class BenchmarkShuffleSplitCacheScanBenchmark : public BenchmarkShuffleSplit { numRows += recordBatch->num_rows(); } } while (recordBatch); - std::cout << "parquet parse done elapsed time " << elapseRead / 1000000 << " ms " << std::endl; - std::cout << "batches = " << numBatches << " rows = " << numRows << std::endl; + LOG(INFO) << "parquet parse done elapsed time " << elapseRead / 1000000 << " ms "; + LOG(INFO) << "batches = " << numBatches << " rows = " << numRows; for (auto _ : state) { for_each( @@ -291,8 +291,8 @@ class BenchmarkShuffleSplitCacheScanBenchmark : public BenchmarkShuffleSplit { ARROW_ASSIGN_OR_THROW(cb, recordBatch2VeloxColumnarBatch(*recordBatch)); TIME_NANO_OR_THROW(splitTime, shuffleWriter->split(cb, ShuffleWriter::kMinMemLimit)); }); - // std::cout << " split done memory allocated = " << - // options.memory_pool->bytes_allocated() << std::endl; + // LOG(INFO) << " split done memory allocated = " << + // options.memory_pool->bytes_allocated() ; } TIME_NANO_OR_THROW(splitTime, shuffleWriter->stop()); @@ -315,7 +315,7 @@ class BenchmarkShuffleSplitIterateScanBenchmark : public BenchmarkShuffleSplit { ShuffleWriterOptions options, benchmark::State& state) { if (state.thread_index() == 0) - std::cout << schema_->ToString() << std::endl; + LOG(INFO) << schema_->ToString(); GLUTEN_ASSIGN_OR_THROW( shuffleWriter, @@ -353,7 +353,7 @@ int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); if (FLAGS_file.size() == 0) { - std::cerr << "No input data file. Please specify via argument --file" << std::endl; + LOG(WARNING) << "No input data file. Please specify via argument --file"; } if (FLAGS_partitions == -1) { diff --git a/cpp/velox/benchmarks/common/BenchmarkUtils.cc b/cpp/velox/benchmarks/common/BenchmarkUtils.cc index b0085c54f829..678cd55ece3f 100644 --- a/cpp/velox/benchmarks/common/BenchmarkUtils.cc +++ b/cpp/velox/benchmarks/common/BenchmarkUtils.cc @@ -116,7 +116,7 @@ bool checkPathExists(const std::string& filepath) { void abortIfFileNotExists(const std::string& filepath) { if (!checkPathExists(filepath)) { - std::cerr << "File path does not exist: " << filepath << std::endl; + LOG(WARNING) << "File path does not exist: " << filepath; ::benchmark::Shutdown(); std::exit(EXIT_FAILURE); } @@ -147,7 +147,7 @@ void setCpu(uint32_t cpuindex) { CPU_ZERO(&cs); CPU_SET(cpuindex, &cs); if (sched_setaffinity(0, sizeof(cs), &cs) == -1) { - std::cerr << "Error binding CPU " << std::to_string(cpuindex) << std::endl; + LOG(WARNING) << "Error binding CPU " << std::to_string(cpuindex); exit(EXIT_FAILURE); } } diff --git a/cpp/velox/benchmarks/common/BenchmarkUtils.h b/cpp/velox/benchmarks/common/BenchmarkUtils.h index 5ed974417224..695027188463 100644 --- a/cpp/velox/benchmarks/common/BenchmarkUtils.h +++ b/cpp/velox/benchmarks/common/BenchmarkUtils.h @@ -18,11 +18,13 @@ #pragma once #include +#include #include #include #include #include #include + #include "benchmark/benchmark.h" #include "substrait/SubstraitToVeloxPlan.h" diff --git a/cpp/velox/benchmarks/common/FileReaderIterator.h b/cpp/velox/benchmarks/common/FileReaderIterator.h index c12c90e22706..3fa94b6afba5 100644 --- a/cpp/velox/benchmarks/common/FileReaderIterator.h +++ b/cpp/velox/benchmarks/common/FileReaderIterator.h @@ -24,7 +24,6 @@ #include "compute/ResultIterator.h" #include "memory/ColumnarBatch.h" #include "memory/ColumnarBatchIterator.h" -#include "utils/DebugOut.h" namespace gluten { diff --git a/cpp/velox/benchmarks/common/OrcReaderIterator.h b/cpp/velox/benchmarks/common/OrcReaderIterator.h index 8bc2a50c9ec3..4f68a598645f 100644 --- a/cpp/velox/benchmarks/common/OrcReaderIterator.h +++ b/cpp/velox/benchmarks/common/OrcReaderIterator.h @@ -61,7 +61,6 @@ class OrcStreamReaderIterator final : public OrcReaderIterator { std::shared_ptr next() override { auto startTime = std::chrono::steady_clock::now(); GLUTEN_ASSIGN_OR_THROW(auto batch, recordBatchReader_->Next()); - DEBUG_OUT << "OrcStreamReaderIterator get a batch, num rows: " << (batch ? batch->num_rows() : 0) << std::endl; collectBatchTime_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime).count(); if (batch == nullptr) { @@ -76,16 +75,7 @@ class OrcBufferedReaderIterator final : public OrcReaderIterator { explicit OrcBufferedReaderIterator(const std::string& path) : OrcReaderIterator(path) { createReader(); collectBatches(); - iter_ = batches_.begin(); -#ifdef GLUTEN_PRINT_DEBUG - DEBUG_OUT << "OrcBufferedReaderIterator open file: " << path << std::endl; - DEBUG_OUT << "Number of input batches: " << std::to_string(batches_.size()) << std::endl; - if (iter_ != batches_.cend()) { - DEBUG_OUT << "columns: " << (*iter_)->num_columns() << std::endl; - DEBUG_OUT << "rows: " << (*iter_)->num_rows() << std::endl; - } -#endif } std::shared_ptr next() override { diff --git a/cpp/velox/benchmarks/common/ParquetReaderIterator.h b/cpp/velox/benchmarks/common/ParquetReaderIterator.h index adba79daab10..f80b244a2741 100644 --- a/cpp/velox/benchmarks/common/ParquetReaderIterator.h +++ b/cpp/velox/benchmarks/common/ParquetReaderIterator.h @@ -34,7 +34,7 @@ class ParquetReaderIterator : public FileReaderIterator { fileReader_->GetRecordBatchReader(arrow::internal::Iota(fileReader_->num_row_groups()), &recordBatchReader_)); auto schema = recordBatchReader_->schema(); - std::cout << "schema:\n" << schema->ToString() << std::endl; + LOG(INFO) << "schema:\n" << schema->ToString(); } std::shared_ptr getSchema() override { @@ -50,13 +50,11 @@ class ParquetStreamReaderIterator final : public ParquetReaderIterator { public: explicit ParquetStreamReaderIterator(const std::string& path) : ParquetReaderIterator(path) { createReader(); - DEBUG_OUT << "ParquetStreamReaderIterator open file: " << path << std::endl; } std::shared_ptr next() override { auto startTime = std::chrono::steady_clock::now(); GLUTEN_ASSIGN_OR_THROW(auto batch, recordBatchReader_->Next()); - DEBUG_OUT << "ParquetStreamReaderIterator get a batch, num rows: " << (batch ? batch->num_rows() : 0) << std::endl; collectBatchTime_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime).count(); if (batch == nullptr) { @@ -71,14 +69,7 @@ class ParquetBufferedReaderIterator final : public ParquetReaderIterator { explicit ParquetBufferedReaderIterator(const std::string& path) : ParquetReaderIterator(path) { createReader(); collectBatches(); - iter_ = batches_.begin(); - DEBUG_OUT << "ParquetBufferedReaderIterator open file: " << path << std::endl; - DEBUG_OUT << "Number of input batches: " << std::to_string(batches_.size()) << std::endl; - if (iter_ != batches_.cend()) { - DEBUG_OUT << "columns: " << (*iter_)->num_columns() << std::endl; - DEBUG_OUT << "rows: " << (*iter_)->num_rows() << std::endl; - } } std::shared_ptr next() override { diff --git a/cpp/velox/compute/VeloxPlanConverter.cc b/cpp/velox/compute/VeloxPlanConverter.cc index 4d181e4ffd43..28d387a6144a 100644 --- a/cpp/velox/compute/VeloxPlanConverter.cc +++ b/cpp/velox/compute/VeloxPlanConverter.cc @@ -22,7 +22,6 @@ #include "compute/ResultIterator.h" #include "config/GlutenConfig.h" #include "operators/plannodes/RowVectorStream.h" -#include "utils/DebugOut.h" #include "velox/common/file/FileSystems.h" namespace gluten { @@ -213,7 +212,6 @@ std::shared_ptr VeloxPlanConverter::toVel } } auto veloxPlan = substraitVeloxPlanConverter_.toVeloxPlan(substraitPlan); - DEBUG_OUT << "Plan Node: " << std::endl << veloxPlan->toString(true, true) << std::endl; return veloxPlan; } diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 7cbf383e4d4c..b3f829fea788 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -237,8 +237,8 @@ void WholeStageResultIterator::collectMetrics() { const auto& nodeId = orderedNodeIds_[idx]; if (planStats.find(nodeId) == planStats.end()) { if (omittedNodeIds_.find(nodeId) == omittedNodeIds_.end()) { - DEBUG_OUT << "Not found node id: " << nodeId << std::endl; - DEBUG_OUT << "Plan Node: " << std::endl << veloxPlan_->toString(true, true) << std::endl; + LOG(WARNING) << "Not found node id: " << nodeId; + LOG(WARNING) << "Plan Node: " << std::endl << veloxPlan_->toString(true, true); throw std::runtime_error("Node id cannot be found in plan status."); } // Special handing for Filter over Project case. Filter metrics are diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc index 3cdd4a161369..682b42066e4c 100644 --- a/cpp/velox/jni/VeloxJniWrapper.cc +++ b/cpp/velox/jni/VeloxJniWrapper.cc @@ -49,8 +49,6 @@ jint JNI_OnLoad(JavaVM* vm, void*) { gluten::initVeloxJniFileSystem(env); gluten::initVeloxJniUDF(env); - DEBUG_OUT << "Loaded Velox backend." << std::endl; - return jniVersion; } diff --git a/cpp/velox/udf/examples/MyUDF.cpp b/cpp/velox/udf/examples/MyUDF.cpp index 91ae9f753146..91d9e295d485 100644 --- a/cpp/velox/udf/examples/MyUDF.cpp +++ b/cpp/velox/udf/examples/MyUDF.cpp @@ -92,5 +92,5 @@ DEFINE_REGISTER_UDF { "myudf1", integerSignatures(), std::make_unique>(5)); facebook::velox::exec::registerVectorFunction( "myudf2", bigintSignatures(), std::make_unique>(5)); - std::cout << "registered myudf1, myudf2" << std::endl; + LOG(INFO) << "registered myudf1, myudf2"; } From 9a8684758b0480d6b988ba8cee54fd57fd7dc4bb Mon Sep 17 00:00:00 2001 From: yangchuan Date: Fri, 29 Dec 2023 09:54:47 +0800 Subject: [PATCH 2/5] fix --- cpp/core/jni/JniCommon.h | 2 ++ cpp/core/shuffle/LocalPartitionWriter.cc | 1 + cpp/velox/benchmarks/common/OrcReaderIterator.h | 7 +++++++ cpp/velox/benchmarks/common/ParquetReaderIterator.h | 8 ++++++++ cpp/velox/compute/VeloxPlanConverter.cc | 1 + 5 files changed, 19 insertions(+) diff --git a/cpp/core/jni/JniCommon.h b/cpp/core/jni/JniCommon.h index 468224899cb2..085cb2af13f9 100644 --- a/cpp/core/jni/JniCommon.h +++ b/cpp/core/jni/JniCommon.h @@ -105,11 +105,13 @@ static inline jmethodID getStaticMethodIdOrError(JNIEnv* env, jclass thisClass, static inline void attachCurrentThreadAsDaemonOrThrow(JavaVM* vm, JNIEnv** out) { int getEnvStat = vm->GetEnv(reinterpret_cast(out), jniVersion); if (getEnvStat == JNI_EDETACHED) { + DLOG(INFO) << "JNIEnv was not attached to current thread."; // Reattach current thread to JVM getEnvStat = vm->AttachCurrentThreadAsDaemon(reinterpret_cast(out), NULL); if (getEnvStat != JNI_OK) { throw gluten::GlutenException("Failed to reattach current thread to JVM."); } + DLOG(INFO) << "Succeeded attaching current thread."; return; } if (getEnvStat != JNI_OK) { diff --git a/cpp/core/shuffle/LocalPartitionWriter.cc b/cpp/core/shuffle/LocalPartitionWriter.cc index 91225f7b5211..3e944c0e93f2 100644 --- a/cpp/core/shuffle/LocalPartitionWriter.cc +++ b/cpp/core/shuffle/LocalPartitionWriter.cc @@ -128,6 +128,7 @@ class FlushOnSpillEvictHandle final : public LocalPartitionWriter::LocalEvictHan ARROW_ASSIGN_OR_RAISE(auto start, os_->Tell()); RETURN_NOT_OK(arrow::ipc::WriteIpcPayload(*payload, options_, os_.get(), &metadataLength)); ARROW_ASSIGN_OR_RAISE(auto end, os_->Tell()); + DLOG(INFO) << "Spilled partition " << partitionId << " file start: " << start << ", file end: " << end; spillInfo_->partitionSpillInfos.push_back({partitionId, end - start}); return arrow::Status::OK(); } diff --git a/cpp/velox/benchmarks/common/OrcReaderIterator.h b/cpp/velox/benchmarks/common/OrcReaderIterator.h index 4f68a598645f..f8c9f44b2008 100644 --- a/cpp/velox/benchmarks/common/OrcReaderIterator.h +++ b/cpp/velox/benchmarks/common/OrcReaderIterator.h @@ -61,6 +61,7 @@ class OrcStreamReaderIterator final : public OrcReaderIterator { std::shared_ptr next() override { auto startTime = std::chrono::steady_clock::now(); GLUTEN_ASSIGN_OR_THROW(auto batch, recordBatchReader_->Next()); + DLOG(INFO) << "OrcStreamReaderIterator get a batch, num rows: " << (batch ? batch->num_rows() : 0); collectBatchTime_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime).count(); if (batch == nullptr) { @@ -76,6 +77,12 @@ class OrcBufferedReaderIterator final : public OrcReaderIterator { createReader(); collectBatches(); iter_ = batches_.begin(); + DLOG(INFO) << "OrcBufferedReaderIterator open file: " << path; + DLOG(INFO) << "Number of input batches: " << std::to_string(batches_.size()); + if (iter_ != batches_.cend()) { + DLOG(INFO) << "columns: " << (*iter_)->num_columns(); + DLOG(INFO) << "rows: " << (*iter_)->num_rows(); + } } std::shared_ptr next() override { diff --git a/cpp/velox/benchmarks/common/ParquetReaderIterator.h b/cpp/velox/benchmarks/common/ParquetReaderIterator.h index f80b244a2741..3858adce4bb5 100644 --- a/cpp/velox/benchmarks/common/ParquetReaderIterator.h +++ b/cpp/velox/benchmarks/common/ParquetReaderIterator.h @@ -50,11 +50,13 @@ class ParquetStreamReaderIterator final : public ParquetReaderIterator { public: explicit ParquetStreamReaderIterator(const std::string& path) : ParquetReaderIterator(path) { createReader(); + DLOG(INFO) << "ParquetStreamReaderIterator open file: " << path; } std::shared_ptr next() override { auto startTime = std::chrono::steady_clock::now(); GLUTEN_ASSIGN_OR_THROW(auto batch, recordBatchReader_->Next()); + DLOG(INFO) << "ParquetStreamReaderIterator get a batch, num rows: " << (batch ? batch->num_rows() : 0); collectBatchTime_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime).count(); if (batch == nullptr) { @@ -70,6 +72,12 @@ class ParquetBufferedReaderIterator final : public ParquetReaderIterator { createReader(); collectBatches(); iter_ = batches_.begin(); + DLOG(INFO) << "ParquetBufferedReaderIterator open file: " << path; + DLOG(INFO) << "Number of input batches: " << std::to_string(batches_.size()); + if (iter_ != batches_.cend()) { + DLOG(INFO) << "columns: " << (*iter_)->num_columns(); + DLOG(INFO) << "rows: " << (*iter_)->num_rows(); + } } std::shared_ptr next() override { diff --git a/cpp/velox/compute/VeloxPlanConverter.cc b/cpp/velox/compute/VeloxPlanConverter.cc index 28d387a6144a..cef4cd039ed3 100644 --- a/cpp/velox/compute/VeloxPlanConverter.cc +++ b/cpp/velox/compute/VeloxPlanConverter.cc @@ -212,6 +212,7 @@ std::shared_ptr VeloxPlanConverter::toVel } } auto veloxPlan = substraitVeloxPlanConverter_.toVeloxPlan(substraitPlan); + DLOG(INFO) << "Plan Node: " << std::endl << veloxPlan->toString(true, true); return veloxPlan; } From 788bd3702fd59878c55e83e5041ed3d6e8262ec8 Mon Sep 17 00:00:00 2001 From: yangchuan Date: Tue, 2 Jan 2024 18:22:04 +0800 Subject: [PATCH 3/5] fix comments --- cpp/core/utils/qpl/qpl_job_pool.cc | 4 ++-- cpp/velox/jni/VeloxJniWrapper.cc | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/core/utils/qpl/qpl_job_pool.cc b/cpp/core/utils/qpl/qpl_job_pool.cc index 375551b5e37e..3d10128a437e 100644 --- a/cpp/core/utils/qpl/qpl_job_pool.cc +++ b/cpp/core/utils/qpl/qpl_job_pool.cc @@ -37,7 +37,7 @@ QplJobHWPool& QplJobHWPool::GetInstance() { QplJobHWPool::QplJobHWPool() : randomEngine(std::random_device()()), distribution(0, MAX_JOB_NUMBER - 1) { uint64_t initTime = 0; TIME_NANO(initTime, InitJobPool()); - LOG(INFO) << "Init job pool took " << 1.0 * initTime / 1e6 << "ms"; + DLOG(INFO) << "Init job pool took " << 1.0 * initTime / 1e6 << "ms"; } QplJobHWPool::~QplJobHWPool() { @@ -96,7 +96,7 @@ qpl_job* QplJobHWPool::AcquireJob(uint32_t& jobId) { } } jobId = MAX_JOB_NUMBER - index; - LOG(INFO) << "Acquired job index " << index << " after " << retry << " retries."; + DLOG(INFO) << "Acquired job index " << index << " after " << retry << " retries."; return jobPool[index]; } diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc index 682b42066e4c..981735e81c20 100644 --- a/cpp/velox/jni/VeloxJniWrapper.cc +++ b/cpp/velox/jni/VeloxJniWrapper.cc @@ -49,6 +49,8 @@ jint JNI_OnLoad(JavaVM* vm, void*) { gluten::initVeloxJniFileSystem(env); gluten::initVeloxJniUDF(env); + DLOG(INFO) << "Loaded Velox backend."; + return jniVersion; } From fd21ef03ef2d66a6b91d1d3cf12dbbe8c026f508 Mon Sep 17 00:00:00 2001 From: yangchuan Date: Thu, 4 Jan 2024 18:10:40 +0800 Subject: [PATCH 4/5] fix --- cpp/core/benchmarks/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/core/benchmarks/CMakeLists.txt b/cpp/core/benchmarks/CMakeLists.txt index fce0ed6151e5..6d39501477df 100644 --- a/cpp/core/benchmarks/CMakeLists.txt +++ b/cpp/core/benchmarks/CMakeLists.txt @@ -31,7 +31,7 @@ macro(package_add_gbenchmark TESTNAME) add_executable(${TESTNAME} ${ARGN}) - target_link_libraries(${TESTNAME} benchmark::benchmark gluten ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(${TESTNAME} benchmark::benchmark gluten google::glog ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(${TESTNAME} PUBLIC ${source_root_directory}) set_target_properties(${TESTNAME} PROPERTIES FOLDER tests) endmacro() From 2a23be058e84f4e3a745ab43a31581b11d7093d2 Mon Sep 17 00:00:00 2001 From: yangchuan Date: Thu, 4 Jan 2024 19:18:29 +0800 Subject: [PATCH 5/5] fix --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 176d08b716b8..0a3549b7eefb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -159,7 +159,7 @@ function(ADD_TEST_CASE TEST_NAME) endif() add_executable(${TEST_NAME} ${SOURCES}) - target_link_libraries(${TEST_NAME} gluten GTest::gtest GTest::gtest_main Threads::Threads) + target_link_libraries(${TEST_NAME} gluten google::glog GTest::gtest GTest::gtest_main Threads::Threads) target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/core) if(ARG_EXTRA_LINK_LIBS)