From a038e9332a7b06f7e9e31892d0544c715b78a1c8 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Fri, 16 Aug 2024 13:23:58 +0800 Subject: [PATCH] [VL] Fix warning when spark.gluten.sql.columnarToRowMemoryThreshold is not set (#6866) --- .../apache/gluten/execution/VeloxTPCHSuite.scala | 2 +- cpp/core/config/GlutenConfig.h | 1 - cpp/core/jni/JniWrapper.cc | 15 ++------------- .../scala/org/apache/gluten/GlutenConfig.scala | 11 +++++------ 4 files changed, 8 insertions(+), 21 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala index 22f96bbbc4c2..0e94c242c1db 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxTPCHSuite.scala @@ -255,7 +255,7 @@ class VeloxTPCHDistinctSpillSuite extends VeloxTPCHTableSupport { super.sparkConf .set("spark.memory.offHeap.size", "50m") .set("spark.gluten.memory.overAcquiredMemoryRatio", "0.9") // to trigger distinct spill early - .set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY, "8k") + .set(GlutenConfig.GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key, "8k") } test("distinct spill") { diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h index e4f5a884b920..057d85930d2a 100644 --- a/cpp/core/config/GlutenConfig.h +++ b/cpp/core/config/GlutenConfig.h @@ -57,7 +57,6 @@ const std::string kGzipWindowSize4k = "4096"; const std::string kParquetCompressionCodec = "spark.sql.parquet.compression.codec"; const std::string kColumnarToRowMemoryThreshold = "spark.gluten.sql.columnarToRowMemoryThreshold"; -const std::string kColumnarToRowMemoryDefaultThreshold = "67108864"; // 64MB const std::string kUGIUserName = "spark.gluten.ugi.username"; const std::string kUGITokens = "spark.gluten.ugi.tokens"; diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc index 5c2752f18ae7..4be5e9142818 100644 --- a/cpp/core/jni/JniWrapper.cc +++ b/cpp/core/jni/JniWrapper.cc @@ -534,19 +534,8 @@ Java_org_apache_gluten_vectorized_NativeColumnarToRowJniWrapper_nativeColumnarTo auto& conf = ctx->getConfMap(); int64_t column2RowMemThreshold; auto it = conf.find(kColumnarToRowMemoryThreshold); - bool confIsLegal = - ((it == conf.end()) ? false : std::all_of(it->second.begin(), it->second.end(), [](unsigned char c) { - return std::isdigit(c); - })); - if (confIsLegal) { - column2RowMemThreshold = std::stoll(it->second); - } else { - LOG(INFO) - << "Because the spark.gluten.sql.columnarToRowMemoryThreshold configuration item is invalid, the kColumnarToRowMemoryDefaultThreshold default value is used, which is " - << kColumnarToRowMemoryDefaultThreshold << " byte"; - column2RowMemThreshold = std::stoll(kColumnarToRowMemoryDefaultThreshold); - } - + GLUTEN_CHECK(!(it == conf.end()), "Required key not found in runtime config: " + kColumnarToRowMemoryThreshold); + column2RowMemThreshold = std::stoll(it->second); // Convert the native batch to Spark unsafe row. return ctx->saveObject(ctx->createColumnar2RowConverter(column2RowMemThreshold)); JNI_METHOD_END(kInvalidObjectHandle) diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index b1ef4be5cf63..0146c3604f25 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -587,9 +587,6 @@ object GlutenConfig { val GLUTEN_SHUFFLE_WRITER_MERGE_THRESHOLD = "spark.gluten.sql.columnar.shuffle.merge.threshold" - // Columnar to row memory threshold. - val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY = "spark.gluten.sql.columnarToRowMemoryThreshold" - // Controls whether to load DLL from jars. User can get dependent native libs packed into a jar // by executing dev/package.sh. Then, with that jar configured, Gluten can load the native libs // at runtime. This config is just for velox backend. And it is NOT applicable to the situation @@ -654,7 +651,6 @@ object GlutenConfig { GLUTEN_SAVE_DIR, GLUTEN_TASK_OFFHEAP_SIZE_IN_BYTES_KEY, GLUTEN_MAX_BATCH_SIZE_KEY, - GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY, GLUTEN_SHUFFLE_WRITER_BUFFER_SIZE, SQLConf.SESSION_LOCAL_TIMEZONE.key, GLUTEN_DEFAULT_SESSION_TIMEZONE_KEY, @@ -690,7 +686,10 @@ object GlutenConfig { (SQLConf.IGNORE_MISSING_FILES.key, SQLConf.IGNORE_MISSING_FILES.defaultValueString), ( COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.key, - COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString) + COLUMNAR_MEMORY_BACKTRACE_ALLOCATION.defaultValueString), + ( + GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.key, + GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD.defaultValue.get.toString) ) keyWithDefault.forEach(e => nativeConfMap.put(e._1, conf.getOrElse(e._1, e._2))) @@ -1123,7 +1122,7 @@ object GlutenConfig { .createWithDefault(4096) val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD = - buildConf(GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY) + buildConf("spark.gluten.sql.columnarToRowMemoryThreshold") .internal() .bytesConf(ByteUnit.BYTE) .createWithDefaultString("64MB")