From 0309dbb87cf25f2102c1efa17faf721e49ebd1fc Mon Sep 17 00:00:00 2001 From: Chengcheng Jin Date: Mon, 15 Jul 2024 16:48:17 +0000 Subject: [PATCH] [VL] Add helper function ColumnarBatches.toString --- cpp/core/jni/JniWrapper.cc | 13 +++++++++++++ cpp/core/memory/ColumnarBatch.h | 2 ++ cpp/velox/memory/VeloxColumnarBatch.h | 4 ++++ .../columnarbatch/ColumnarBatchJniWrapper.java | 2 ++ .../gluten/columnarbatch/ColumnarBatches.java | 6 ++++++ 5 files changed, 27 insertions(+) diff --git a/cpp/core/jni/JniWrapper.cc b/cpp/core/jni/JniWrapper.cc index ab90e28495b71..73a556a4cd050 100644 --- a/cpp/core/jni/JniWrapper.cc +++ b/cpp/core/jni/JniWrapper.cc @@ -732,6 +732,19 @@ JNIEXPORT jlong JNICALL Java_org_apache_gluten_columnarbatch_ColumnarBatchJniWra JNI_METHOD_END(kInvalidObjectHandle) } +JNIEXPORT jstring JNICALL Java_org_apache_gluten_columnarbatch_ColumnarBatchJniWrapper_toString( // NOLINT + JNIEnv* env, + jobject wrapper, + jlong handle, + jint start, + jint length) { + JNI_METHOD_START + GLUTEN_CHECK(length >= 0, "ColumnarBatch toString length should be greater or equal than 0"); + auto batch = ObjectStore::retrieve(handle); + return env->NewStringUTF(batch->toString(start, length).c_str()); + JNI_METHOD_END(nullptr) +} + JNIEXPORT void JNICALL Java_org_apache_gluten_columnarbatch_ColumnarBatchJniWrapper_close( // NOLINT JNIEnv* env, jobject wrapper, diff --git a/cpp/core/memory/ColumnarBatch.h b/cpp/core/memory/ColumnarBatch.h index fd8189aa6a20e..55da2dac6a53a 100644 --- a/cpp/core/memory/ColumnarBatch.h +++ b/cpp/core/memory/ColumnarBatch.h @@ -52,6 +52,8 @@ class ColumnarBatch { // Serializes one single row to byte array that can be accessed as Spark-compatible unsafe row. virtual std::vector toUnsafeRow(int32_t rowId) const; + virtual std::string toString(int32_t start, int32_t length) const = 0; + friend std::ostream& operator<<(std::ostream& os, const ColumnarBatch& columnarBatch); private: diff --git a/cpp/velox/memory/VeloxColumnarBatch.h b/cpp/velox/memory/VeloxColumnarBatch.h index 6c79f2772d2dd..c82a008f99770 100644 --- a/cpp/velox/memory/VeloxColumnarBatch.h +++ b/cpp/velox/memory/VeloxColumnarBatch.h @@ -42,6 +42,10 @@ class VeloxColumnarBatch final : public ColumnarBatch { std::shared_ptr exportArrowSchema() override; std::shared_ptr exportArrowArray() override; std::vector toUnsafeRow(int32_t rowId) const override; + + std::string toString(int32_t start, int32_t length) const override { + return getRowVector()->toString(start, start + length); + } std::shared_ptr select(facebook::velox::memory::MemoryPool* pool, std::vector columnIndices); facebook::velox::RowVectorPtr getRowVector() const; facebook::velox::RowVectorPtr getFlattenedRowVector(); diff --git a/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatchJniWrapper.java b/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatchJniWrapper.java index e71e9d7bee1b5..3cbcd7663f71d 100644 --- a/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatchJniWrapper.java +++ b/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatchJniWrapper.java @@ -48,6 +48,8 @@ public static ColumnarBatchJniWrapper create(Runtime runtime) { public native long select(long batch, int[] columnIndices); + public native String toString(long handle, int start, int length); + public native void close(long batch); @Override diff --git a/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatches.java b/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatches.java index cb68e032dc5ba..275df41cd55ca 100644 --- a/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatches.java +++ b/gluten-data/src/main/java/org/apache/gluten/columnarbatch/ColumnarBatches.java @@ -380,4 +380,10 @@ public static void release(ColumnarBatch b) { public static long getNativeHandle(ColumnarBatch batch) { return getIndicatorVector(batch).handle(); } + + public static String toString(ColumnarBatch batch, int start, int length) { + return ColumnarBatchJniWrapper + .create(Runtimes.contextInstance("ColumnarBatches#toString")) + .toString(getNativeHandle(batch), start, length); + } }