From ecebb2994c4756d1082fa3957ff6cae4d89ec818 Mon Sep 17 00:00:00 2001 From: "Ma, Rong" Date: Tue, 4 Jun 2024 13:09:54 +0800 Subject: [PATCH] fix compression --- cpp/core/jni/JniWrapper.cc | 18 ++++++++++-------- cpp/velox/benchmarks/GenericBenchmark.cc | 2 ++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc index f5a6c4bd70d04..c54e6672d39a1 100644 --- a/cpp/core/jni/JniWrapper.cc +++ b/cpp/core/jni/JniWrapper.cc @@ -844,6 +844,7 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_ShuffleWriterJniWrappe throw gluten::GlutenException(std::string("Short partitioning name can't be null")); } + // Build ShuffleWriterOptions. auto shuffleWriterOptions = ShuffleWriterOptions{ .bufferSize = bufferSize, .bufferReallocThreshold = reallocThreshold, @@ -851,7 +852,15 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_ShuffleWriterJniWrappe .taskAttemptId = (int64_t)taskAttemptId, .startPartitionId = startPartitionId, }; + auto shuffleWriterTypeC = env->GetStringUTFChars(shuffleWriterTypeJstr, JNI_FALSE); + auto shuffleWriterType = std::string(shuffleWriterTypeC); + env->ReleaseStringUTFChars(shuffleWriterTypeJstr, shuffleWriterTypeC); + + if (shuffleWriterType == "sort") { + shuffleWriterOptions.shuffleWriterType = kSortShuffle; + } + // Build PartitionWriterOptions. auto partitionWriterOptions = PartitionWriterOptions{ .mergeBufferSize = mergeBufferSize, .mergeThreshold = mergeThreshold, @@ -867,20 +876,13 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_vectorized_ShuffleWriterJniWrappe partitionWriterOptions.codecBackend = getCodecBackend(env, codecBackendJstr); partitionWriterOptions.compressionMode = getCompressionMode(env, compressionModeJstr); } + std::unique_ptr partitionWriter; auto partitionWriterTypeC = env->GetStringUTFChars(partitionWriterTypeJstr, JNI_FALSE); auto partitionWriterType = std::string(partitionWriterTypeC); env->ReleaseStringUTFChars(partitionWriterTypeJstr, partitionWriterTypeC); - auto shuffleWriterTypeC = env->GetStringUTFChars(shuffleWriterTypeJstr, JNI_FALSE); - auto shuffleWriterType = std::string(shuffleWriterTypeC); - env->ReleaseStringUTFChars(shuffleWriterTypeJstr, shuffleWriterTypeC); - - if (shuffleWriterType == "sort") { - shuffleWriterOptions.shuffleWriterType = kSortShuffle; - } - if (partitionWriterType == "local") { if (dataFileJstr == NULL) { throw gluten::GlutenException(std::string("Shuffle DataFile can't be null")); diff --git a/cpp/velox/benchmarks/GenericBenchmark.cc b/cpp/velox/benchmarks/GenericBenchmark.cc index fdcba571cc499..381403f8fa28d 100644 --- a/cpp/velox/benchmarks/GenericBenchmark.cc +++ b/cpp/velox/benchmarks/GenericBenchmark.cc @@ -106,9 +106,11 @@ std::shared_ptr createShuffleWriter( if (FLAGS_compression == "lz4") { partitionWriterOptions.codecBackend = CodecBackend::NONE; partitionWriterOptions.compressionType = arrow::Compression::LZ4_FRAME; + partitionWriterOptions.compressionTypeStr = "lz4"; } else if (FLAGS_compression == "zstd") { partitionWriterOptions.codecBackend = CodecBackend::NONE; partitionWriterOptions.compressionType = arrow::Compression::ZSTD; + partitionWriterOptions.compressionTypeStr = "zstd"; } else if (FLAGS_compression == "qat_gzip") { partitionWriterOptions.codecBackend = CodecBackend::QAT; partitionWriterOptions.compressionType = arrow::Compression::GZIP;