diff --git a/cpp/core/config/GlutenConfig.h b/cpp/core/config/GlutenConfig.h index e918edf80b9ad..d8abe9f2bf671 100644 --- a/cpp/core/config/GlutenConfig.h +++ b/cpp/core/config/GlutenConfig.h @@ -57,6 +57,7 @@ const std::string kGzipWindowSize4k = "4096"; const std::string kParquetCompressionCodec = "spark.sql.parquet.compression.codec"; const std::string kColumnToRowMemoryThreshold = "spark.gluten.sql.columnToRowMemoryThreshold"; +const std::string kColumnToRowMemoryDefaultThreshold = "67108864"; // 64MB const std::string kUGIUserName = "spark.gluten.ugi.username"; const std::string kUGITokens = "spark.gluten.ugi.tokens"; diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc index b7e5379071bc2..ac7da73266eb0 100644 --- a/cpp/core/jni/JniWrapper.cc +++ b/cpp/core/jni/JniWrapper.cc @@ -506,11 +506,14 @@ Java_org_apache_gluten_vectorized_NativeColumnarToRowJniWrapper_nativeColumnarTo auto columnarToRowConverter = ctx->objectStore()->retrieve(c2rHandle); auto cb = ctx->objectStore()->retrieve(batchHandle); - int64_t column2RowMemThreshold = 256 * 1024 * 1024; - if (auto it = conf.find(kColumnToRowMemoryThreshold); it != conf.end()) { - if (std::all_of(it->second.begin(), it->second.end(), [](unsigned char c) { return std::isdigit(c); })) { - column2RowMemThreshold = std::stoll(it->second); - } + int64_t column2RowMemThreshold; + auto it = conf.find(kColumnToRowMemoryThreshold); + bool confIsLeagal = + std::all_of(it->second.begin(), it->second.end(), [](unsigned char c) { return std::isdigit(c); }); + if (it != conf.end() && confIsLeagal) { + column2RowMemThreshold = std::stoll(it->second); + } else { + column2RowMemThreshold = std::stoll(kColumnToRowMemoryDefaultThreshold); } columnarToRowConverter->convert(cb, rowId, column2RowMemThreshold); diff --git a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala index 9c85d145620fc..c592a350a0866 100644 --- a/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala +++ b/shims/common/src/main/scala/org/apache/gluten/GlutenConfig.scala @@ -1087,8 +1087,8 @@ object GlutenConfig { val GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD = buildConf(GLUTEN_COLUMNAR_TO_ROW_MEM_THRESHOLD_KEY) .internal() - .longConf - .createWithDefault(256 * 1024 * 1024) + .bytesConf(ByteUnit.BYTE) + .createWithDefaultString("64MB") // if not set, use COLUMNAR_MAX_BATCH_SIZE instead val SHUFFLE_WRITER_BUFFER_SIZE =