diff --git a/cpp/CMake/BuildGTest.cmake b/cpp/CMake/BuildGTest.cmake index 7318b7bd7081..fff99455c419 100644 --- a/cpp/CMake/BuildGTest.cmake +++ b/cpp/CMake/BuildGTest.cmake @@ -14,7 +14,7 @@ message(STATUS "Building gtest from source") FetchContent_Declare( gtest URL ${GLUTEN_GTEST_SOURCE_URL} - URL_HASH "SHA256=${GLUTEN_GTEST_BUILD_SHA256_CHECKSUM}" + URL_HASH "${GLUTEN_GTEST_BUILD_SHA256_CHECKSUM}" ) FetchContent_MakeAvailable(gtest) diff --git a/cpp/velox/benchmarks/CMakeLists.txt b/cpp/velox/benchmarks/CMakeLists.txt index e350e1f8fa88..05278e1bb6b1 100644 --- a/cpp/velox/benchmarks/CMakeLists.txt +++ b/cpp/velox/benchmarks/CMakeLists.txt @@ -35,6 +35,8 @@ add_velox_benchmark(columnar_to_row_benchmark ColumnarToRowBenchmark.cc) add_velox_benchmark(parquet_write_benchmark ParquetWriteBenchmark.cc) +add_velox_benchmark(plan_validator_util PlanValidatorUtil.cc) + add_velox_benchmark(shuffle_split_benchmark ShuffleSplitBenchmark.cc) if(ENABLE_ORC) diff --git a/cpp/velox/benchmarks/PlanValidatorUtil.cc b/cpp/velox/benchmarks/PlanValidatorUtil.cc new file mode 100644 index 000000000000..3afb2ee00265 --- /dev/null +++ b/cpp/velox/benchmarks/PlanValidatorUtil.cc @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "benchmarks/common/BenchmarkUtils.h" +#include "compute/VeloxBackend.h" +#include "compute/VeloxRuntime.h" +#include "memory/VeloxMemoryManager.h" +#include "substrait/SubstraitToVeloxPlanValidator.h" + +using namespace gluten; + +/// Set spark.gluten.sql.debug=true to get validation plan and dump it into a json file, +/// then use this util debug validate process easilly in native side. +int main(int argc, char** argv) { + if (argc != 2) { + LOG(WARNING) << "PlanValidatorUtil usage: \n" + << "./plan_validator_util /substrait_json_plan"; + return -1; + } + std::string planPath = argv[1]; + + std::ifstream msgJson(planPath); + std::stringstream buffer; + buffer << msgJson.rdbuf(); + std::string msgData = buffer.str(); + auto plan = substraitFromJsonToPb("Plan", msgData); + + core::QueryCtx queryCtx; + auto pool = defaultLeafVeloxMemoryPool().get(); + core::ExecCtx execCtx(pool, &queryCtx); + + ::substrait::Plan subPlan; + parseProtobuf(reinterpret_cast(plan.data()), plan.size(), &subPlan); + + SubstraitToVeloxPlanValidator planValidator(pool, &execCtx); + try { + if (!planValidator.validate(subPlan)) { + auto reason = planValidator.getValidateLog(); + for (auto& msg : reason) { + LOG(INFO) << msg; + } + } else { + LOG(INFO) << planPath << " is valid."; + } + } catch (std::invalid_argument& e) { + LOG(INFO) << "Failed to validate substrait plan because " << e.what(); + } + + return 0; +} diff --git a/cpp/velox/jni/VeloxJniWrapper.cc b/cpp/velox/jni/VeloxJniWrapper.cc index 2865ff42627c..3cdd4a161369 100644 --- a/cpp/velox/jni/VeloxJniWrapper.cc +++ b/cpp/velox/jni/VeloxJniWrapper.cc @@ -19,6 +19,7 @@ #include #include + #include #include "JniUdf.h" #include "compute/VeloxBackend.h" @@ -27,6 +28,7 @@ #include "jni/JniFileSystem.h" #include "memory/VeloxMemoryManager.h" #include "substrait/SubstraitToVeloxPlanValidator.h" +#include "utils/ConfigExtractor.h" #include @@ -84,11 +86,22 @@ JNIEXPORT void JNICALL Java_io_glutenproject_udf_UdfJniWrapper_nativeLoadUdfLibr JNIEXPORT jobject JNICALL Java_io_glutenproject_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFailureReason( // NOLINT JNIEnv* env, - jobject, + jobject wrapper, jbyteArray planArray) { JNI_METHOD_START + auto ctx = gluten::getRuntime(env, wrapper); auto planData = reinterpret_cast(env->GetByteArrayElements(planArray, 0)); auto planSize = env->GetArrayLength(planArray); + if (gluten::debugModeEnabled(ctx->getConfMap())) { + try { + auto jsonPlan = gluten::substraitFromPbToJson("Plan", planData, planSize); + LOG(INFO) << std::string(50, '#') << " received substrait::Plan: for validation"; + LOG(INFO) << jsonPlan; + } catch (const std::exception& e) { + LOG(WARNING) << "Error converting Substrait plan for validation to JSON: " << e.what(); + } + } + ::substrait::Plan subPlan; gluten::parseProtobuf(planData, planSize, &subPlan); diff --git a/docs/developers/HowTo.md b/docs/developers/HowTo.md index e4bab4a26883..27ede7fe0415 100644 --- a/docs/developers/HowTo.md +++ b/docs/developers/HowTo.md @@ -91,10 +91,17 @@ gdb generic_benchmark 6. get more detail information about benchmarks from [MicroBenchmarks](./MicroBenchmarks.md) -## 2 How to debug Java/Scala +## 2 How to debug plan validation process +Gluten will validate generated plan before execute it, and validation usually happens in native side, so we provide a utility to help debug validation process in native side. + +1. Run query with conf `spark.gluten.sql.debug=true`, and you will find generated plan be printed in stderr with json format, save it as `plan.json` for example. +2. Compile cpp part with `--build_benchmarks=ON`, then check `plan_validator_util` executable file in `gluten_home/cpp/build/velox/benchmarks/`. +3. Run or debug with `./plan_validator_util /plan.json` + +## 3 How to debug Java/Scala wait to add -## 3 How to debug with core-dump +## 4 How to debug with core-dump wait to complete ``` cd the_directory_of_core_file_generated