Skip to content

Commit

Permalink
[VL] Add plan validation util for debugging validate process (#3972)
Browse files Browse the repository at this point in the history
  • Loading branch information
Yohahaha authored Dec 13, 2023
1 parent 8edcb72 commit 0f5d9e1
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 4 deletions.
2 changes: 1 addition & 1 deletion cpp/CMake/BuildGTest.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ message(STATUS "Building gtest from source")
FetchContent_Declare(
gtest
URL ${GLUTEN_GTEST_SOURCE_URL}
URL_HASH "SHA256=${GLUTEN_GTEST_BUILD_SHA256_CHECKSUM}"
URL_HASH "${GLUTEN_GTEST_BUILD_SHA256_CHECKSUM}"
)

FetchContent_MakeAvailable(gtest)
2 changes: 2 additions & 0 deletions cpp/velox/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ add_velox_benchmark(columnar_to_row_benchmark ColumnarToRowBenchmark.cc)

add_velox_benchmark(parquet_write_benchmark ParquetWriteBenchmark.cc)

add_velox_benchmark(plan_validator_util PlanValidatorUtil.cc)

add_velox_benchmark(shuffle_split_benchmark ShuffleSplitBenchmark.cc)

if(ENABLE_ORC)
Expand Down
64 changes: 64 additions & 0 deletions cpp/velox/benchmarks/PlanValidatorUtil.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "benchmarks/common/BenchmarkUtils.h"
#include "compute/VeloxBackend.h"
#include "compute/VeloxRuntime.h"
#include "memory/VeloxMemoryManager.h"
#include "substrait/SubstraitToVeloxPlanValidator.h"

using namespace gluten;

/// Set spark.gluten.sql.debug=true to get validation plan and dump it into a json file,
/// then use this util debug validate process easilly in native side.
int main(int argc, char** argv) {
if (argc != 2) {
LOG(WARNING) << "PlanValidatorUtil usage: \n"
<< "./plan_validator_util <path>/substrait_json_plan";
return -1;
}
std::string planPath = argv[1];

std::ifstream msgJson(planPath);
std::stringstream buffer;
buffer << msgJson.rdbuf();
std::string msgData = buffer.str();
auto plan = substraitFromJsonToPb("Plan", msgData);

core::QueryCtx queryCtx;
auto pool = defaultLeafVeloxMemoryPool().get();
core::ExecCtx execCtx(pool, &queryCtx);

::substrait::Plan subPlan;
parseProtobuf(reinterpret_cast<uint8_t*>(plan.data()), plan.size(), &subPlan);

SubstraitToVeloxPlanValidator planValidator(pool, &execCtx);
try {
if (!planValidator.validate(subPlan)) {
auto reason = planValidator.getValidateLog();
for (auto& msg : reason) {
LOG(INFO) << msg;
}
} else {
LOG(INFO) << planPath << " is valid.";
}
} catch (std::invalid_argument& e) {
LOG(INFO) << "Failed to validate substrait plan because " << e.what();
}

return 0;
}
15 changes: 14 additions & 1 deletion cpp/velox/jni/VeloxJniWrapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <glog/logging.h>
#include <jni/JniCommon.h>

#include <exception>
#include "JniUdf.h"
#include "compute/VeloxBackend.h"
Expand All @@ -27,6 +28,7 @@
#include "jni/JniFileSystem.h"
#include "memory/VeloxMemoryManager.h"
#include "substrait/SubstraitToVeloxPlanValidator.h"
#include "utils/ConfigExtractor.h"

#include <iostream>

Expand Down Expand Up @@ -84,11 +86,22 @@ JNIEXPORT void JNICALL Java_io_glutenproject_udf_UdfJniWrapper_nativeLoadUdfLibr
JNIEXPORT jobject JNICALL
Java_io_glutenproject_vectorized_PlanEvaluatorJniWrapper_nativeValidateWithFailureReason( // NOLINT
JNIEnv* env,
jobject,
jobject wrapper,
jbyteArray planArray) {
JNI_METHOD_START
auto ctx = gluten::getRuntime(env, wrapper);
auto planData = reinterpret_cast<const uint8_t*>(env->GetByteArrayElements(planArray, 0));
auto planSize = env->GetArrayLength(planArray);
if (gluten::debugModeEnabled(ctx->getConfMap())) {
try {
auto jsonPlan = gluten::substraitFromPbToJson("Plan", planData, planSize);
LOG(INFO) << std::string(50, '#') << " received substrait::Plan: for validation";
LOG(INFO) << jsonPlan;
} catch (const std::exception& e) {
LOG(WARNING) << "Error converting Substrait plan for validation to JSON: " << e.what();
}
}

::substrait::Plan subPlan;
gluten::parseProtobuf(planData, planSize, &subPlan);

Expand Down
11 changes: 9 additions & 2 deletions docs/developers/HowTo.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,17 @@ gdb generic_benchmark

6. get more detail information about benchmarks from [MicroBenchmarks](./MicroBenchmarks.md)

## 2 How to debug Java/Scala
## 2 How to debug plan validation process
Gluten will validate generated plan before execute it, and validation usually happens in native side, so we provide a utility to help debug validation process in native side.

1. Run query with conf `spark.gluten.sql.debug=true`, and you will find generated plan be printed in stderr with json format, save it as `plan.json` for example.
2. Compile cpp part with `--build_benchmarks=ON`, then check `plan_validator_util` executable file in `gluten_home/cpp/build/velox/benchmarks/`.
3. Run or debug with `./plan_validator_util <path>/plan.json`

## 3 How to debug Java/Scala
wait to add

## 3 How to debug with core-dump
## 4 How to debug with core-dump
wait to complete
```
cd the_directory_of_core_file_generated
Expand Down

0 comments on commit 0f5d9e1

Please sign in to comment.