Skip to content

Commit

Permalink
[VL] Add a benchmark to track on iterator facility's performance (apa…
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored and Deepa8 committed Jun 26, 2024
1 parent 0ebb64e commit bca7ef2
Show file tree
Hide file tree
Showing 16 changed files with 438 additions and 253 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import org.apache.gluten.substrait.plan.PlanNode
import org.apache.gluten.substrait.rel.{LocalFilesBuilder, LocalFilesNode, SplitInfo}
import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat
import org.apache.gluten.utils._
import org.apache.gluten.utils.iterator.Iterators
import org.apache.gluten.vectorized._

import org.apache.spark.{SparkConf, TaskContext}
Expand All @@ -36,7 +37,7 @@ import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
import org.apache.spark.sql.connector.read.InputPartition
import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
import org.apache.spark.sql.execution.metric.SQLMetric
import org.apache.spark.sql.types.{BinaryType, DateType, Decimal, DecimalType, StructType, TimestampType}
import org.apache.spark.sql.types._
import org.apache.spark.sql.utils.OASPackageBridge.InputMetricsWrapper
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.util.ExecutorManager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ import org.apache.gluten.exception.SchemaMismatchException
import org.apache.gluten.execution.RowToVeloxColumnarExec
import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
import org.apache.gluten.memory.arrow.pool.ArrowNativeMemoryPool
import org.apache.gluten.utils.{ArrowUtil, Iterators}
import org.apache.gluten.utils.ArrowUtil
import org.apache.gluten.utils.iterator.Iterators
import org.apache.gluten.vectorized.ArrowWritableColumnVector

import org.apache.spark.TaskContext
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ import org.apache.gluten.exception.GlutenException
import org.apache.gluten.exec.Runtimes
import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
import org.apache.gluten.memory.nmm.NativeMemoryManagers
import org.apache.gluten.utils.{ArrowAbiUtil, Iterators}
import org.apache.gluten.utils.ArrowAbiUtil
import org.apache.gluten.utils.iterator.Iterators
import org.apache.gluten.vectorized._

import org.apache.spark.broadcast.Broadcast
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
package org.apache.gluten.execution

import org.apache.gluten.extension.GlutenPlan
import org.apache.gluten.utils.{Iterators, VeloxBatchAppender}
import org.apache.gluten.utils.VeloxBatchAppender
import org.apache.gluten.utils.iterator.Iterators

import org.apache.spark.rdd.RDD
import org.apache.spark.sql.catalyst.InternalRow
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
*/
package org.apache.gluten.execution

import org.apache.gluten.utils.Iterators
import org.apache.gluten.utils.iterator.Iterators

import org.apache.spark.{broadcast, SparkContext}
import org.apache.spark.sql.execution.joins.BuildSideRelation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import org.apache.gluten.columnarbatch.ColumnarBatches
import org.apache.gluten.exception.GlutenNotSupportException
import org.apache.gluten.extension.ValidationResult
import org.apache.gluten.memory.nmm.NativeMemoryManagers
import org.apache.gluten.utils.Iterators
import org.apache.gluten.utils.iterator.Iterators
import org.apache.gluten.vectorized.NativeColumnarToRowJniWrapper

import org.apache.spark.broadcast.Broadcast
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ import org.apache.gluten.columnarbatch.ColumnarBatches
import org.apache.gluten.exception.GlutenException
import org.apache.gluten.extension.GlutenPlan
import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
import org.apache.gluten.utils.{Iterators, PullOutProjectHelper}
import org.apache.gluten.utils.PullOutProjectHelper
import org.apache.gluten.utils.iterator.Iterators
import org.apache.gluten.vectorized.ArrowWritableColumnVector

import org.apache.spark.{ContextAwareIterator, SparkEnv, TaskContext}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ import org.apache.gluten.exec.Runtimes
import org.apache.gluten.execution.{RowToVeloxColumnarExec, VeloxColumnarToRowExec}
import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators
import org.apache.gluten.memory.nmm.NativeMemoryManagers
import org.apache.gluten.utils.{ArrowAbiUtil, Iterators}
import org.apache.gluten.utils.ArrowAbiUtil
import org.apache.gluten.utils.iterator.Iterators
import org.apache.gluten.vectorized.ColumnarBatchSerializerJniWrapper

import org.apache.spark.internal.Logging
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.spark.sql.execution.datasources

import org.apache.gluten.datasource.DatasourceJniWrapper
import org.apache.gluten.utils.Iterators
import org.apache.gluten.utils.iterator.Iterators
import org.apache.gluten.vectorized.ColumnarBatchInIterator

import org.apache.spark.TaskContext
Expand Down
228 changes: 0 additions & 228 deletions gluten-core/src/main/scala/org/apache/gluten/utils/Iterators.scala

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gluten.utils.iterator

import org.apache.gluten.utils.iterator.IteratorsV1.WrapperBuilderV1

import org.apache.spark.TaskContext

/**
* Utility class to provide iterator wrappers for non-trivial use cases. E.g. iterators that manage
* payload's lifecycle.
*/
object Iterators {
sealed trait Version
case object V1 extends Version

private val DEFAULT_VERSION: Version = V1

trait WrapperBuilder[A] {
def recyclePayload(closeCallback: (A) => Unit): WrapperBuilder[A]
def recycleIterator(completionCallback: => Unit): WrapperBuilder[A]
def collectLifeMillis(onCollected: Long => Unit): WrapperBuilder[A]
def collectReadMillis(onAdded: Long => Unit): WrapperBuilder[A]
def asInterruptible(context: TaskContext): WrapperBuilder[A]
def protectInvocationFlow(): WrapperBuilder[A]
def create(): Iterator[A]
}

def wrap[A](in: Iterator[A]): WrapperBuilder[A] = {
wrap(V1, in)
}

def wrap[A](version: Version, in: Iterator[A]): WrapperBuilder[A] = {
version match {
case V1 =>
new WrapperBuilderV1[A](in)
}
}
}
Loading

0 comments on commit bca7ef2

Please sign in to comment.