diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 48eaa4f83a576..176d08b716b80 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -74,13 +74,7 @@ include(ResolveDependency) # Compiler flags # -if (${CMAKE_BUILD_TYPE} STREQUAL "RelWithDebInfo") -# keep this original logic - add_definitions(-DGLUTEN_PRINT_DEBUG) - message(STATUS "Add definition GLUTEN_PRINT_DEBUG") -elseif (${CMAKE_BUILD_TYPE} STREQUAL "Debug") - add_definitions(-DGLUTEN_PRINT_DEBUG) - message(STATUS "Add definition GLUTEN_PRINT_DEBUG") +if (${CMAKE_BUILD_TYPE} STREQUAL "Debug") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb -O0") message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}") else () diff --git a/cpp/core/CMakeLists.txt b/cpp/core/CMakeLists.txt index fb7c714a3ef89..b1e97d83f8b06 100644 --- a/cpp/core/CMakeLists.txt +++ b/cpp/core/CMakeLists.txt @@ -209,7 +209,6 @@ set(SPARK_COLUMNAR_PLUGIN_SRCS shuffle/rss/CelebornPartitionWriter.cc shuffle/Utils.cc utils/Compression.cc - utils/DebugOut.cc utils/StringUtil.cc utils/ObjectStore.cc jni/JniError.cc diff --git a/cpp/core/benchmarks/CompressionBenchmark.cc b/cpp/core/benchmarks/CompressionBenchmark.cc index 3e2396818235c..1966c8ef1c189 100644 --- a/cpp/core/benchmarks/CompressionBenchmark.cc +++ b/cpp/core/benchmarks/CompressionBenchmark.cc @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -126,7 +127,7 @@ class BenchmarkCompression { } case gluten::kQatZstd: { ipcWriteOptions.codec = createArrowIpcCodec(arrow::Compression::ZSTD, CodecBackend::QAT); - std::cout << "load qatzstd" << std::endl; + LOG(INFO) << "load qatzstd"; break; } #endif @@ -163,7 +164,7 @@ class BenchmarkCompression { state); auto endTime = std::chrono::steady_clock::now(); auto totalTime = (endTime - startTime).count(); - std::cout << "Thread " << state.thread_index() << " took " << (1.0 * totalTime / 1e9) << "s" << std::endl; + LOG(INFO) << "Thread " << state.thread_index() << " took " << (1.0 * totalTime / 1e9) << "s"; state.counters["rowgroups"] = benchmark::Counter(rowGroupIndices_.size(), benchmark::Counter::kAvgThreads, benchmark::Counter::OneK::kIs1000); @@ -301,8 +302,8 @@ class BenchmarkCompressionCacheScanBenchmark final : public BenchmarkCompression } } while (recordBatch); - std::cout << "parquet parse done elapsed time " << elapseRead / 1e6 << " ms " << std::endl; - std::cout << "batches = " << numBatches << " rows = " << numRows << std::endl; + LOG(INFO) << "parquet parse done elapsed time " << elapseRead / 1e6 << " ms "; + LOG(INFO) << "batches = " << numBatches << " rows = " << numRows; std::vector> payloads(batches.size()); std::vector> uncompressedBufferSize(batches.size()); @@ -418,16 +419,16 @@ int main(int argc, char** argv) { } else if (strcmp(argv[i], "--file") == 0) { datafile = argv[i + 1]; } else if (strcmp(argv[i], "--qat-gzip") == 0) { - std::cout << "QAT gzip is used as codec" << std::endl; + LOG(INFO) << "QAT gzip is used as codec"; codec = gluten::kQatGzip; } else if (strcmp(argv[i], "--qat-zstd") == 0) { - std::cout << "QAT zstd is used as codec" << std::endl; + LOG(INFO) << "QAT zstd is used as codec"; codec = gluten::kQatZstd; } else if (strcmp(argv[i], "--qpl-gzip") == 0) { - std::cout << "QPL gzip is used as codec" << std::endl; + LOG(INFO) << "QPL gzip is used as codec"; codec = gluten::kQplGzip; } else if (strcmp(argv[i], "--zstd") == 0) { - std::cout << "CPU zstd is used as codec" << std::endl; + LOG(INFO) << "CPU zstd is used as codec"; codec = gluten::kZstd; } else if (strcmp(argv[i], "--buffer-size") == 0) { compressBufferSize = atol(argv[i + 1]); @@ -435,9 +436,9 @@ int main(int argc, char** argv) { cpuOffset = atol(argv[i + 1]); } } - std::cout << "iterations = " << iterations << std::endl; - std::cout << "threads = " << threads << std::endl; - std::cout << "datafile = " << datafile << std::endl; + LOG(INFO) << "iterations = " << iterations; + LOG(INFO) << "threads = " << threads; + LOG(INFO) << "datafile = " << datafile; gluten::BenchmarkCompressionIterateScanBenchmark bmIterateScan(datafile, compressBufferSize); gluten::BenchmarkCompressionCacheScanBenchmark bmCacheScan(datafile, compressBufferSize); diff --git a/cpp/core/compute/Runtime.h b/cpp/core/compute/Runtime.h index f188e24203d1d..6729e28c007cc 100644 --- a/cpp/core/compute/Runtime.h +++ b/cpp/core/compute/Runtime.h @@ -17,6 +17,8 @@ #pragma once +#include + #include "compute/ProtobufUtils.h" #include "compute/ResultIterator.h" #include "memory/ArrowMemoryPool.h" @@ -29,7 +31,6 @@ #include "shuffle/ShuffleReader.h" #include "shuffle/ShuffleWriter.h" #include "substrait/plan.pb.h" -#include "utils/DebugOut.h" #include "utils/ObjectStore.h" namespace gluten { diff --git a/cpp/core/jni/JniCommon.h b/cpp/core/jni/JniCommon.h index 265382aa2d201..468224899cb27 100644 --- a/cpp/core/jni/JniCommon.h +++ b/cpp/core/jni/JniCommon.h @@ -28,7 +28,6 @@ #include "memory/AllocationListener.h" #include "shuffle/rss/RssClient.h" #include "utils/Compression.h" -#include "utils/DebugOut.h" #include "utils/exception.h" static jint jniVersion = JNI_VERSION_1_8; @@ -52,7 +51,7 @@ static inline void checkException(JNIEnv* env) { std::string description = jStringToCString(env, (jstring)env->CallStaticObjectMethod(describerClass, describeMethod, t)); if (env->ExceptionCheck()) { - std::cerr << "Fatal: Uncaught Java exception during calling the Java exception describer method! " << std::endl; + LOG(WARNING) << "Fatal: Uncaught Java exception during calling the Java exception describer method! "; } throw gluten::GlutenException("Error during calling Java code from native code: " + description); } @@ -106,13 +105,11 @@ static inline jmethodID getStaticMethodIdOrError(JNIEnv* env, jclass thisClass, static inline void attachCurrentThreadAsDaemonOrThrow(JavaVM* vm, JNIEnv** out) { int getEnvStat = vm->GetEnv(reinterpret_cast(out), jniVersion); if (getEnvStat == JNI_EDETACHED) { - DEBUG_OUT << "JNIEnv was not attached to current thread." << std::endl; // Reattach current thread to JVM getEnvStat = vm->AttachCurrentThreadAsDaemon(reinterpret_cast(out), NULL); if (getEnvStat != JNI_OK) { throw gluten::GlutenException("Failed to reattach current thread to JVM."); } - DEBUG_OUT << "Succeeded attaching current thread." << std::endl; return; } if (getEnvStat != JNI_OK) { @@ -161,7 +158,7 @@ static inline void backtrace() { auto size = backtrace(array, 1024); char** strings = backtrace_symbols(array, size); for (size_t i = 0; i < size; ++i) { - std::cout << strings[i] << std::endl; + LOG(INFO) << strings[i]; } free(strings); } @@ -229,8 +226,8 @@ class SparkAllocationListener final : public gluten::AllocationListener { ~SparkAllocationListener() override { JNIEnv* env; if (vm_->GetEnv(reinterpret_cast(&env), jniVersion) != JNI_OK) { - std::cerr << "SparkAllocationListener#~SparkAllocationListener(): " - << "JNIEnv was not attached to current thread" << std::endl; + LOG(WARNING) << "SparkAllocationListener#~SparkAllocationListener(): " + << "JNIEnv was not attached to current thread"; return; } env->DeleteGlobalRef(jListenerGlobalRef_); @@ -329,8 +326,8 @@ class CelebornClient : public RssClient { ~CelebornClient() { JNIEnv* env; if (vm_->GetEnv(reinterpret_cast(&env), jniVersion) != JNI_OK) { - std::cerr << "CelebornClient#~CelebornClient(): " - << "JNIEnv was not attached to current thread" << std::endl; + LOG(WARNING) << "CelebornClient#~CelebornClient(): " + << "JNIEnv was not attached to current thread"; return; } env->DeleteGlobalRef(javaCelebornShuffleWriter_); diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc index 3974ec8363cb9..a192833a03c93 100644 --- a/cpp/core/jni/JniWrapper.cc +++ b/cpp/core/jni/JniWrapper.cc @@ -18,7 +18,6 @@ #include #include -#include #include "compute/ProtobufUtils.h" #include "compute/Runtime.h" #include "config/GlutenConfig.h" @@ -92,11 +91,10 @@ class JavaInputStreamAdaptor final : public arrow::io::InputStream { try { auto status = JavaInputStreamAdaptor::Close(); if (!status.ok()) { - DEBUG_OUT << __func__ << " call JavaInputStreamAdaptor::Close() failed, status:" << status.ToString() - << std::endl; + LOG(WARNING) << __func__ << " call JavaInputStreamAdaptor::Close() failed, status:" << status.ToString(); } } catch (std::exception& e) { - DEBUG_OUT << __func__ << " call JavaInputStreamAdaptor::Close() got exception:" << e.what() << std::endl; + LOG(WARNING) << __func__ << " call JavaInputStreamAdaptor::Close() got exception:" << e.what(); } } @@ -820,7 +818,7 @@ JNIEXPORT jlong JNICALL Java_io_glutenproject_vectorized_ShuffleWriterJniWrapper jobject thread = env->CallStaticObjectMethod(cls, mid); checkException(env); if (thread == NULL) { - std::cerr << "Thread.currentThread() return NULL" << std::endl; + LOG(WARNING) << "Thread.currentThread() return NULL"; } else { jmethodID midGetid = getMethodIdOrError(env, cls, "getId", "()J"); jlong sid = env->CallLongMethod(thread, midGetid); diff --git a/cpp/core/shuffle/LocalPartitionWriter.cc b/cpp/core/shuffle/LocalPartitionWriter.cc index 313be10653b8c..91225f7b5211a 100644 --- a/cpp/core/shuffle/LocalPartitionWriter.cc +++ b/cpp/core/shuffle/LocalPartitionWriter.cc @@ -19,7 +19,6 @@ #include #include #include "shuffle/Utils.h" -#include "utils/DebugOut.h" #include "utils/StringUtil.h" #include "utils/Timer.h" @@ -129,7 +128,6 @@ class FlushOnSpillEvictHandle final : public LocalPartitionWriter::LocalEvictHan ARROW_ASSIGN_OR_RAISE(auto start, os_->Tell()); RETURN_NOT_OK(arrow::ipc::WriteIpcPayload(*payload, options_, os_.get(), &metadataLength)); ARROW_ASSIGN_OR_RAISE(auto end, os_->Tell()); - DEBUG_OUT << "Spilled partition " << partitionId << " file start: " << start << ", file end: " << end << std::endl; spillInfo_->partitionSpillInfos.push_back({partitionId, end - start}); return arrow::Status::OK(); } diff --git a/cpp/core/tests/RoundRobinPartitionerTest.cc b/cpp/core/tests/RoundRobinPartitionerTest.cc index 9c12b2c7e1ca5..03701060be2e5 100644 --- a/cpp/core/tests/RoundRobinPartitionerTest.cc +++ b/cpp/core/tests/RoundRobinPartitionerTest.cc @@ -15,6 +15,7 @@ * limitations under the License. */ #include "shuffle/RoundRobinPartitioner.h" +#include #include #include @@ -46,10 +47,11 @@ class RoundRobinPartitionerTest : public ::testing::Test { template void toString(const std::vector& vec, const std::string& name) const { - std::cout << name << " = ["; - std::copy(vec.cbegin(), vec.cend(), std::ostream_iterator(std::cout, ",")); - std::cout << " ]"; - std::cout << std::endl; + std::stringstream ss; + ss << name << " = ["; + std::copy(vec.cbegin(), vec.cend(), std::ostream_iterator(ss, ",")); + ss << " ]"; + LOG(INFO) << ss.str(); } int32_t getPidSelection() const { diff --git a/cpp/core/utils/DebugOut.cc b/cpp/core/utils/DebugOut.cc deleted file mode 100644 index 04ea3308f5b9c..0000000000000 --- a/cpp/core/utils/DebugOut.cc +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "DebugOut.h" - -namespace gluten { - -#ifndef GLUTEN_PRINT_DEBUG - -FakeOut fakeOut; - -#endif - -} // namespace gluten diff --git a/cpp/core/utils/DebugOut.h b/cpp/core/utils/DebugOut.h deleted file mode 100644 index 6d1b3b117d2e0..0000000000000 --- a/cpp/core/utils/DebugOut.h +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include - -#ifdef GLUTEN_PRINT_DEBUG - -#define DEBUG_OUT std::cout - -#else - -namespace gluten { - -struct FakeOut { - template - FakeOut& operator<<(T t) { - return *this; - } - - FakeOut& operator<<(std::ostream& (*endl)(std::ostream& os)) { - return *this; - } -}; - -extern FakeOut fakeOut; - -} // namespace gluten - -#define DEBUG_OUT gluten::fakeOut - -#endif diff --git a/cpp/core/utils/ObjectStore.cc b/cpp/core/utils/ObjectStore.cc index 4a8e7c7ea2f44..5dc9c7843a73a 100644 --- a/cpp/core/utils/ObjectStore.cc +++ b/cpp/core/utils/ObjectStore.cc @@ -16,6 +16,7 @@ */ #include "ObjectStore.h" +#include #include gluten::ObjectStore::~ObjectStore() { @@ -24,7 +25,7 @@ gluten::ObjectStore::~ObjectStore() { for (auto itr = aliveObjectHandles_.rbegin(); itr != aliveObjectHandles_.rend(); itr++) { ResourceHandle handle = *itr; if (store_.lookup(handle) == nullptr) { - std::cerr << "Fatal: resource handle " + std::to_string(handle) + " not found in store." << std::endl; + LOG(WARNING) << "Fatal: resource handle " + std::to_string(handle) + " not found in store."; } store_.erase(handle); } diff --git a/cpp/core/utils/Print.h b/cpp/core/utils/Print.h index 0ac64be1ff72d..cf55c42ce1332 100644 --- a/cpp/core/utils/Print.h +++ b/cpp/core/utils/Print.h @@ -19,7 +19,6 @@ #include #include #include -#include "DebugOut.h" namespace gluten { @@ -27,190 +26,6 @@ namespace gluten { // the token `ToString` means the method of `ToString()` // the token `2String` means the method of `toString()` -#ifdef GLUTEN_PRINT_DEBUG - -template -static inline void print(const T& t) { - std::cout << t; -} - -template -static inline void printLf(const T& t) { - std::cout << t << std::endl; -} - -template -static inline void print(const A& a, const B& b) { - std::cout << a << b; -} - -template -static inline void printLf(const A& a, const B& b) { - std::cout << a << b << std::endl; -} - -template -static inline void printSplit(const A& a, const B& b, const std::string split = ": ") { - std::cout << a << split << b; -} - -template -static inline void printSplitLf(const A& a, const B& b, const std::string split = ": ") { - std::cout << a << split << b << std::endl; -} - -template -static inline void printEq(const A& a, const B& b) { - std::cout << a << " = " << b; -} - -template -static inline void printEqlf(const A& a, const B& b) { - std::cout << a << " = " << b << std::endl; -} - -template -static inline void printVs(const A& a, const B& b) { - std::cout << a << " vs " << b; -} - -template -static inline void printVslf(const A& a, const B& b) { - std::cout << a << " vs " << b << std::endl; -} - -template -static inline void printElement(const E& e, bool first = false) { - if (!first) { - std::cout << ", "; - } - std::cout << e; -} - -template -static inline void printRange(ITERATOR begin, ITERATOR end) { - std::cout << "{ "; - for (; begin != end; ++begin) { - std::cout << *begin << " "; - } - std::cout << "}" << std::endl; -} - -template -static inline void printContainer(const C& c, const std::string& containerName = "") { - if (!containerName.empty()) { - std::cout << containerName << " "; - } - std::cout << "size = " << c.size() << " "; - PrintRange(c.begin(), c.end()); -} - -template -static inline void printAB2String(const A& a, const B& b) { - std::cout << a << " = " << b.toString() << std::endl; -} - -template -static inline void print2String(const T& t, const std::string& prefix = "") { - if (!prefix.empty()) { - std::cout << prefix << ": "; - } - std::cout << t.toString() << std::endl; -} - -template -static inline void printRangeToString(ITERATOR begin, ITERATOR end) { - std::cout << "{ "; - for (; begin != end; ++begin) { - std::cout << begin->ToString() << " "; - } - std::cout << "}"; -} - -template -static inline void printRange2String(ITERATOR begin, ITERATOR end) { - std::cout << "{ "; - for (; begin != end; ++begin) { - std::cout << begin->toString() << " "; - } - std::cout << "}"; -} - -template -static inline void printContainerToString(const C& c, const std::string& containerName = "") { - if (!containerName.empty()) { - std::cout << containerName << " "; - } - std::cout << "size = " << c.size() << std::endl; - PrintRangeToString(c.begin(), c.end()); -} - -template -static inline void printContainer2String(const C& c, const std::string& containerName = "") { - if (!containerName.empty()) { - std::cout << containerName << " "; - } - std::cout << "size = " << c.size() << std::endl; - PrintRange2String(c.begin(), c.end()); -} - -template -static inline void printVectorToString(const C& c, const std::string& containerName = "") { - std::cout << containerName << " = {"; - for (auto& x : c) { - std::cout << " " << x->ToString(); - } - std::cout << " }" << std::endl; -} - -template -static inline void printVector2String(const C& c, const std::string& containerName = "") { - std::cout << containerName << " = {"; - for (auto& x : c) { - std::cout << " " << x->toString(); - } - std::cout << " }" << std::endl; -} - -template -static inline void printVectorMapping(const V& v, const std::string& vectorName = "") { - std::cout << vectorName << "\n{\n"; - for (size_t i = 0; i < v.size(); ++i) { - print("\t"); - PrintSplitLF(i, v[i], " -> "); - } - std::cout << "}" << std::endl; -} - -template -static inline void printVectorRange(const V& v, unsigned int begin, unsigned int end) { - std::cout << "{"; - auto index = begin; - for (; index != end; ++index) { - PrintElement(v[index], index == begin); - } - - std::cout << "}" << std::endl; -} - -#define PRINT(a) PrintSplit(#a, a) -#define PRINTLF(a) PrintSplitLF(#a, a) - -#define PRINT_FUNCTION_NAME() std::cout << __func__ << std::endl; -#define PRINT_FUNCTION_SPLIT_LINE() std::cout << "===== " << __func__ << " ======" << std::endl; - -#define PRINT_CONTAINER(c) PrintContainer(c, #c) - -#define PRINT_CONTAINER_TO_STRING(v) PrintContainerToString(v, #v) -#define PRINT_CONTAINER_2_STRING(v) PrintContainer2String(v, #v) - -#define PRINT_VECTOR_TO_STRING(v) PrintVectorToString(v, #v) -#define PRINT_VECTOR_2_STRING(v) PrintVector2String(v, #v) - -#define PRINT_VECTOR_MAPPING(v) PrintVectorMapping(v, #v) - -#else // GLUTEN_PRINT_DEBUG - template static inline void Print(const T& t) {} @@ -293,6 +108,4 @@ static inline void PrintVectorRange(const V& v, unsigned int from, unsigned int #define PRINT_VECTOR_MAPPING(v) -#endif // GLUTEN_PRINT_DEBUG - } // namespace gluten diff --git a/cpp/core/utils/macros.h b/cpp/core/utils/macros.h index 6fd0f15c04542..fef8ae1e75de9 100644 --- a/cpp/core/utils/macros.h +++ b/cpp/core/utils/macros.h @@ -20,6 +20,7 @@ #include #include +#include #include #include "utils/exception.h" diff --git a/cpp/core/utils/qpl/qpl_job_pool.cc b/cpp/core/utils/qpl/qpl_job_pool.cc index f3ba2437799f8..375551b5e37ec 100644 --- a/cpp/core/utils/qpl/qpl_job_pool.cc +++ b/cpp/core/utils/qpl/qpl_job_pool.cc @@ -37,7 +37,7 @@ QplJobHWPool& QplJobHWPool::GetInstance() { QplJobHWPool::QplJobHWPool() : randomEngine(std::random_device()()), distribution(0, MAX_JOB_NUMBER - 1) { uint64_t initTime = 0; TIME_NANO(initTime, InitJobPool()); - DEBUG_OUT << "Init job pool took " << 1.0 * initTime / 1e6 << "ms" << std::endl; + LOG(INFO) << "Init job pool took " << 1.0 * initTime / 1e6 << "ms"; } QplJobHWPool::~QplJobHWPool() { @@ -96,7 +96,7 @@ qpl_job* QplJobHWPool::AcquireJob(uint32_t& jobId) { } } jobId = MAX_JOB_NUMBER - index; - DEBUG_OUT << "Acquired job index " << index << " after " << retry << " retries." << std::endl; + LOG(INFO) << "Acquired job index " << index << " after " << retry << " retries."; return jobPool[index]; } diff --git a/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc b/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc index 093099c755d2a..2f2ff2ce16718 100644 --- a/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc +++ b/cpp/velox/benchmarks/ColumnarToRowBenchmark.cc @@ -131,7 +131,7 @@ class GoogleBenchmarkColumnarToRowCacheScanBenchmark : public GoogleBenchmarkCol localSchema = std::make_shared(*schema_.get()); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); std::unique_ptr<::parquet::arrow::FileReader> parquetReader; std::shared_ptr recordBatchReader; @@ -150,7 +150,7 @@ class GoogleBenchmarkColumnarToRowCacheScanBenchmark : public GoogleBenchmarkCol } } while (recordBatch); - std::cout << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows << std::endl; + LOG(INFO) << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows; // reuse the columnarToRowConverter for batches caused system % increase a lot auto ctxPool = defaultLeafVeloxMemoryPool(); @@ -263,10 +263,10 @@ int main(int argc, char** argv) { cpu = atol(argv[i + 1]); } } - std::cout << "iterations = " << iterations << std::endl; - std::cout << "threads = " << threads << std::endl; - std::cout << "datafile = " << datafile << std::endl; - std::cout << "cpu = " << cpu << std::endl; + LOG(INFO) << "iterations = " << iterations; + LOG(INFO) << "threads = " << threads; + LOG(INFO) << "datafile = " << datafile; + LOG(INFO) << "cpu = " << cpu; gluten::GoogleBenchmarkColumnarToRowCacheScanBenchmark bck(datafile); diff --git a/cpp/velox/benchmarks/GenericBenchmark.cc b/cpp/velox/benchmarks/GenericBenchmark.cc index 24fa5ef7c014c..f46012e8cd149 100644 --- a/cpp/velox/benchmarks/GenericBenchmark.cc +++ b/cpp/velox/benchmarks/GenericBenchmark.cc @@ -184,7 +184,7 @@ auto BM_Generic = [](::benchmark::State& state, return; } if (FLAGS_print_result) { - std::cout << maybeBatch.ValueOrDie()->ToString() << std::endl; + LOG(INFO) << maybeBatch.ValueOrDie()->ToString(); } } @@ -204,7 +204,7 @@ auto BM_Generic = [](::benchmark::State& state, const auto& task = rawIter->task_; const auto& planNode = rawIter->veloxPlan_; auto statsStr = facebook::velox::exec::printPlanWithStats(*planNode, task->taskStats(), true); - std::cout << statsStr << std::endl; + LOG(INFO) << statsStr; } Runtime::release(runtime); @@ -239,11 +239,10 @@ int main(int argc, char** argv) { try { if (argc < 2) { - std::cout + LOG(INFO) << "No input args. Usage: " << std::endl - << "./generic_benchmark /absolute-path/to/substrait_json_file /absolute-path/to/data_file_1 /absolute-path/to/data_file_2 ..." - << std::endl; - std::cout << "Running example..." << std::endl; + << "./generic_benchmark /absolute-path/to/substrait_json_file /absolute-path/to/data_file_1 /absolute-path/to/data_file_2 ..."; + LOG(INFO) << "Running example..."; inputFiles.resize(2); substraitJsonFile = getGeneratedFilePath("example.json"); inputFiles[0] = getGeneratedFilePath("example_orders"); @@ -251,16 +250,16 @@ int main(int argc, char** argv) { } else { substraitJsonFile = argv[1]; abortIfFileNotExists(substraitJsonFile); - std::cout << "Using substrait json file: " << std::endl << substraitJsonFile << std::endl; - std::cout << "Using " << argc - 2 << " input data file(s): " << std::endl; + LOG(INFO) << "Using substrait json file: " << std::endl << substraitJsonFile; + LOG(INFO) << "Using " << argc - 2 << " input data file(s): "; for (auto i = 2; i < argc; ++i) { inputFiles.emplace_back(argv[i]); abortIfFileNotExists(inputFiles.back()); - std::cout << inputFiles.back() << std::endl; + LOG(INFO) << inputFiles.back(); } } } catch (const std::exception& e) { - std::cout << "Failed to run benchmark: " << e.what() << std::endl; + LOG(INFO) << "Failed to run benchmark: " << e.what(); ::benchmark::Shutdown(); std::exit(EXIT_FAILURE); } @@ -281,12 +280,12 @@ int main(int argc, char** argv) { } while (0) #if 0 - std::cout << "FLAGS_threads:" << FLAGS_threads << std::endl; - std::cout << "FLAGS_iterations:" << FLAGS_iterations << std::endl; - std::cout << "FLAGS_cpu:" << FLAGS_cpu << std::endl; - std::cout << "FLAGS_print_result:" << FLAGS_print_result << std::endl; - std::cout << "FLAGS_write_file:" << FLAGS_write_file << std::endl; - std::cout << "FLAGS_batch_size:" << FLAGS_batch_size << std::endl; + LOG(INFO) << "FLAGS_threads:" << FLAGS_threads ; + LOG(INFO) << "FLAGS_iterations:" << FLAGS_iterations ; + LOG(INFO) << "FLAGS_cpu:" << FLAGS_cpu ; + LOG(INFO) << "FLAGS_print_result:" << FLAGS_print_result ; + LOG(INFO) << "FLAGS_write_file:" << FLAGS_write_file ; + LOG(INFO) << "FLAGS_batch_size:" << FLAGS_batch_size ; #endif if (FLAGS_skip_input) { diff --git a/cpp/velox/benchmarks/ParquetWriteBenchmark.cc b/cpp/velox/benchmarks/ParquetWriteBenchmark.cc index dc32436950376..9645152b3a519 100644 --- a/cpp/velox/benchmarks/ParquetWriteBenchmark.cc +++ b/cpp/velox/benchmarks/ParquetWriteBenchmark.cc @@ -141,7 +141,7 @@ class GoogleBenchmarkArrowParquetWriteCacheScanBenchmark : public GoogleBenchmar localSchema = std::make_shared(*schema_.get()); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); std::unique_ptr<::parquet::arrow::FileReader> parquetReader; std::shared_ptr recordBatchReader; @@ -160,7 +160,7 @@ class GoogleBenchmarkArrowParquetWriteCacheScanBenchmark : public GoogleBenchmar } } while (recordBatch); - std::cout << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows << std::endl; + LOG(INFO) << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows; // reuse the ParquetWriteConverter for batches caused system % increase a lot auto fileName = "arrow_parquet_write.parquet"; @@ -234,7 +234,7 @@ class GoogleBenchmarkVeloxParquetWriteCacheScanBenchmark : public GoogleBenchmar localSchema = std::make_shared(*schema_.get()); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); std::unique_ptr<::parquet::arrow::FileReader> parquetReader; std::shared_ptr recordBatchReader; @@ -253,7 +253,7 @@ class GoogleBenchmarkVeloxParquetWriteCacheScanBenchmark : public GoogleBenchmar } } while (recordBatch); - std::cout << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows << std::endl; + LOG(INFO) << " parquet parse done elapsed time = " << elapseRead / 1000000 << " rows = " << numRows; // reuse the ParquetWriteConverter for batches caused system % increase a lot auto fileName = "velox_parquet_write.parquet"; @@ -328,11 +328,11 @@ int main(int argc, char** argv) { output = (argv[i + 1]); } } - std::cout << "iterations = " << iterations << std::endl; - std::cout << "threads = " << threads << std::endl; - std::cout << "datafile = " << datafile << std::endl; - std::cout << "cpu = " << cpu << std::endl; - std::cout << "output = " << output << std::endl; + LOG(INFO) << "iterations = " << iterations; + LOG(INFO) << "threads = " << threads; + LOG(INFO) << "datafile = " << datafile; + LOG(INFO) << "cpu = " << cpu; + LOG(INFO) << "output = " << output; gluten::GoogleBenchmarkVeloxParquetWriteCacheScanBenchmark bck(datafile, output); diff --git a/cpp/velox/benchmarks/QueryBenchmark.cc b/cpp/velox/benchmarks/QueryBenchmark.cc index 7070267036d5f..1f3ed3fbdf1c1 100644 --- a/cpp/velox/benchmarks/QueryBenchmark.cc +++ b/cpp/velox/benchmarks/QueryBenchmark.cc @@ -101,7 +101,7 @@ auto BM = [](::benchmark::State& state, state.SkipWithError(maybeBatch.status().message().c_str()); return; } - std::cout << maybeBatch.ValueOrDie()->ToString() << std::endl; + LOG(INFO) << maybeBatch.ValueOrDie()->ToString(); } } Runtime::release(runtime); diff --git a/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc b/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc index c4aec9650e1b7..a1e5eb9066502 100644 --- a/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc +++ b/cpp/velox/benchmarks/ShuffleSplitBenchmark.cc @@ -254,7 +254,7 @@ class BenchmarkShuffleSplitCacheScanBenchmark : public BenchmarkShuffleSplit { localSchema = std::make_shared(fields); if (state.thread_index() == 0) - std::cout << localSchema->ToString() << std::endl; + LOG(INFO) << localSchema->ToString(); auto pool = options.memory_pool; GLUTEN_ASSIGN_OR_THROW( @@ -279,8 +279,8 @@ class BenchmarkShuffleSplitCacheScanBenchmark : public BenchmarkShuffleSplit { numRows += recordBatch->num_rows(); } } while (recordBatch); - std::cout << "parquet parse done elapsed time " << elapseRead / 1000000 << " ms " << std::endl; - std::cout << "batches = " << numBatches << " rows = " << numRows << std::endl; + LOG(INFO) << "parquet parse done elapsed time " << elapseRead / 1000000 << " ms "; + LOG(INFO) << "batches = " << numBatches << " rows = " << numRows; for (auto _ : state) { for_each( @@ -291,8 +291,8 @@ class BenchmarkShuffleSplitCacheScanBenchmark : public BenchmarkShuffleSplit { ARROW_ASSIGN_OR_THROW(cb, recordBatch2VeloxColumnarBatch(*recordBatch)); TIME_NANO_OR_THROW(splitTime, shuffleWriter->split(cb, ShuffleWriter::kMinMemLimit)); }); - // std::cout << " split done memory allocated = " << - // options.memory_pool->bytes_allocated() << std::endl; + // LOG(INFO) << " split done memory allocated = " << + // options.memory_pool->bytes_allocated() ; } TIME_NANO_OR_THROW(splitTime, shuffleWriter->stop()); @@ -315,7 +315,7 @@ class BenchmarkShuffleSplitIterateScanBenchmark : public BenchmarkShuffleSplit { ShuffleWriterOptions options, benchmark::State& state) { if (state.thread_index() == 0) - std::cout << schema_->ToString() << std::endl; + LOG(INFO) << schema_->ToString(); GLUTEN_ASSIGN_OR_THROW( shuffleWriter, @@ -353,7 +353,7 @@ int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); if (FLAGS_file.size() == 0) { - std::cerr << "No input data file. Please specify via argument --file" << std::endl; + LOG(WARNING) << "No input data file. Please specify via argument --file"; } if (FLAGS_partitions == -1) { diff --git a/cpp/velox/benchmarks/common/BenchmarkUtils.cc b/cpp/velox/benchmarks/common/BenchmarkUtils.cc index b0085c54f829d..678cd55ece3fa 100644 --- a/cpp/velox/benchmarks/common/BenchmarkUtils.cc +++ b/cpp/velox/benchmarks/common/BenchmarkUtils.cc @@ -116,7 +116,7 @@ bool checkPathExists(const std::string& filepath) { void abortIfFileNotExists(const std::string& filepath) { if (!checkPathExists(filepath)) { - std::cerr << "File path does not exist: " << filepath << std::endl; + LOG(WARNING) << "File path does not exist: " << filepath; ::benchmark::Shutdown(); std::exit(EXIT_FAILURE); } @@ -147,7 +147,7 @@ void setCpu(uint32_t cpuindex) { CPU_ZERO(&cs); CPU_SET(cpuindex, &cs); if (sched_setaffinity(0, sizeof(cs), &cs) == -1) { - std::cerr << "Error binding CPU " << std::to_string(cpuindex) << std::endl; + LOG(WARNING) << "Error binding CPU " << std::to_string(cpuindex); exit(EXIT_FAILURE); } } diff --git a/cpp/velox/benchmarks/common/BenchmarkUtils.h b/cpp/velox/benchmarks/common/BenchmarkUtils.h index 5ed9744172241..695027188463d 100644 --- a/cpp/velox/benchmarks/common/BenchmarkUtils.h +++ b/cpp/velox/benchmarks/common/BenchmarkUtils.h @@ -18,11 +18,13 @@ #pragma once #include +#include #include #include #include #include #include + #include "benchmark/benchmark.h" #include "substrait/SubstraitToVeloxPlan.h" diff --git a/cpp/velox/benchmarks/common/FileReaderIterator.h b/cpp/velox/benchmarks/common/FileReaderIterator.h index c12c90e22706a..3fa94b6afba52 100644 --- a/cpp/velox/benchmarks/common/FileReaderIterator.h +++ b/cpp/velox/benchmarks/common/FileReaderIterator.h @@ -24,7 +24,6 @@ #include "compute/ResultIterator.h" #include "memory/ColumnarBatch.h" #include "memory/ColumnarBatchIterator.h" -#include "utils/DebugOut.h" namespace gluten { diff --git a/cpp/velox/benchmarks/common/OrcReaderIterator.h b/cpp/velox/benchmarks/common/OrcReaderIterator.h index 8bc2a50c9ec36..4f68a598645fb 100644 --- a/cpp/velox/benchmarks/common/OrcReaderIterator.h +++ b/cpp/velox/benchmarks/common/OrcReaderIterator.h @@ -61,7 +61,6 @@ class OrcStreamReaderIterator final : public OrcReaderIterator { std::shared_ptr next() override { auto startTime = std::chrono::steady_clock::now(); GLUTEN_ASSIGN_OR_THROW(auto batch, recordBatchReader_->Next()); - DEBUG_OUT << "OrcStreamReaderIterator get a batch, num rows: " << (batch ? batch->num_rows() : 0) << std::endl; collectBatchTime_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime).count(); if (batch == nullptr) { @@ -76,16 +75,7 @@ class OrcBufferedReaderIterator final : public OrcReaderIterator { explicit OrcBufferedReaderIterator(const std::string& path) : OrcReaderIterator(path) { createReader(); collectBatches(); - iter_ = batches_.begin(); -#ifdef GLUTEN_PRINT_DEBUG - DEBUG_OUT << "OrcBufferedReaderIterator open file: " << path << std::endl; - DEBUG_OUT << "Number of input batches: " << std::to_string(batches_.size()) << std::endl; - if (iter_ != batches_.cend()) { - DEBUG_OUT << "columns: " << (*iter_)->num_columns() << std::endl; - DEBUG_OUT << "rows: " << (*iter_)->num_rows() << std::endl; - } -#endif } std::shared_ptr next() override { diff --git a/cpp/velox/benchmarks/common/ParquetReaderIterator.h b/cpp/velox/benchmarks/common/ParquetReaderIterator.h index adba79daab106..f80b244a27419 100644 --- a/cpp/velox/benchmarks/common/ParquetReaderIterator.h +++ b/cpp/velox/benchmarks/common/ParquetReaderIterator.h @@ -34,7 +34,7 @@ class ParquetReaderIterator : public FileReaderIterator { fileReader_->GetRecordBatchReader(arrow::internal::Iota(fileReader_->num_row_groups()), &recordBatchReader_)); auto schema = recordBatchReader_->schema(); - std::cout << "schema:\n" << schema->ToString() << std::endl; + LOG(INFO) << "schema:\n" << schema->ToString(); } std::shared_ptr getSchema() override { @@ -50,13 +50,11 @@ class ParquetStreamReaderIterator final : public ParquetReaderIterator { public: explicit ParquetStreamReaderIterator(const std::string& path) : ParquetReaderIterator(path) { createReader(); - DEBUG_OUT << "ParquetStreamReaderIterator open file: " << path << std::endl; } std::shared_ptr next() override { auto startTime = std::chrono::steady_clock::now(); GLUTEN_ASSIGN_OR_THROW(auto batch, recordBatchReader_->Next()); - DEBUG_OUT << "ParquetStreamReaderIterator get a batch, num rows: " << (batch ? batch->num_rows() : 0) << std::endl; collectBatchTime_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime).count(); if (batch == nullptr) { @@ -71,14 +69,7 @@ class ParquetBufferedReaderIterator final : public ParquetReaderIterator { explicit ParquetBufferedReaderIterator(const std::string& path) : ParquetReaderIterator(path) { createReader(); collectBatches(); - iter_ = batches_.begin(); - DEBUG_OUT << "ParquetBufferedReaderIterator open file: " << path << std::endl; - DEBUG_OUT << "Number of input batches: " << std::to_string(batches_.size()) << std::endl; - if (iter_ != batches_.cend()) { - DEBUG_OUT << "columns: " << (*iter_)->num_columns() << std::endl; - DEBUG_OUT << "rows: " << (*iter_)->num_rows() << std::endl; - } } std::shared_ptr next() override { diff --git a/cpp/velox/compute/VeloxPlanConverter.cc b/cpp/velox/compute/VeloxPlanConverter.cc index 45b2927a4ff10..6a89a21e93f00 100644 --- a/cpp/velox/compute/VeloxPlanConverter.cc +++ b/cpp/velox/compute/VeloxPlanConverter.cc @@ -22,7 +22,6 @@ #include "compute/ResultIterator.h" #include "config/GlutenConfig.h" #include "operators/plannodes/RowVectorStream.h" -#include "utils/DebugOut.h" #include "velox/common/file/FileSystems.h" namespace gluten { @@ -212,7 +211,6 @@ std::shared_ptr VeloxPlanConverter::toVel } } auto veloxPlan = substraitVeloxPlanConverter_.toVeloxPlan(substraitPlan); - DEBUG_OUT << "Plan Node: " << std::endl << veloxPlan->toString(true, true) << std::endl; return veloxPlan; } diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 7cbf383e4d4c7..b3f829fea7883 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -237,8 +237,8 @@ void WholeStageResultIterator::collectMetrics() { const auto& nodeId = orderedNodeIds_[idx]; if (planStats.find(nodeId) == planStats.end()) { if (omittedNodeIds_.find(nodeId) == omittedNodeIds_.end()) { - DEBUG_OUT << "Not found node id: " << nodeId << std::endl; - DEBUG_OUT << "Plan Node: " << std::endl << veloxPlan_->toString(true, true) << std::endl; + LOG(WARNING) << "Not found node id: " << nodeId; + LOG(WARNING) << "Plan Node: " << std::endl << veloxPlan_->toString(true, true); throw std::runtime_error("Node id cannot be found in plan status."); } // Special handing for Filter over Project case. Filter metrics are diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc index 3cdd4a1613699..682b42066e4ca 100644 --- a/cpp/velox/jni/VeloxJniWrapper.cc +++ b/cpp/velox/jni/VeloxJniWrapper.cc @@ -49,8 +49,6 @@ jint JNI_OnLoad(JavaVM* vm, void*) { gluten::initVeloxJniFileSystem(env); gluten::initVeloxJniUDF(env); - DEBUG_OUT << "Loaded Velox backend." << std::endl; - return jniVersion; } diff --git a/cpp/velox/udf/examples/MyUDF.cpp b/cpp/velox/udf/examples/MyUDF.cpp index 91ae9f753146c..91d9e295d485d 100644 --- a/cpp/velox/udf/examples/MyUDF.cpp +++ b/cpp/velox/udf/examples/MyUDF.cpp @@ -92,5 +92,5 @@ DEFINE_REGISTER_UDF { "myudf1", integerSignatures(), std::make_unique>(5)); facebook::velox::exec::registerVectorFunction( "myudf2", bigintSignatures(), std::make_unique>(5)); - std::cout << "registered myudf1, myudf2" << std::endl; + LOG(INFO) << "registered myudf1, myudf2"; }