From 51e67932886b7417b8ad8a94d30b131791b57b98 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Wed, 4 Sep 2024 09:28:58 +0800 Subject: [PATCH] [GLUTEN-7031] Move iterator wrappers to gluten-core (#7095) --- .../backendsapi/velox/VeloxIteratorApi.scala | 2 +- .../gluten/datasource/ArrowCSVFileFormat.scala | 2 +- .../gluten/execution/RowToVeloxColumnarExec.scala | 2 +- .../execution/VeloxBroadcastBuildSideRDD.scala | 2 +- .../gluten/execution/VeloxColumnarToRowExec.scala | 2 +- .../gluten/execution/VeloxResizeBatchesExec.scala | 2 +- .../api/python/ColumnarArrowEvalPythonExec.scala | 2 +- .../execution/ColumnarCachedBatchSerializer.scala | 2 +- .../org/apache/gluten}/iterator/Iterators.scala | 6 +++--- .../org/apache/gluten}/iterator/IteratorsV1.scala | 6 +++--- .../org/apache/spark/task}/SparkTaskUtil.scala | 2 +- .../memtarget/spark/TreeMemoryConsumerTest.java | 0 .../apache/gluten}/iterator/IteratorSuite.scala | 4 ++-- .../apache/gluten/task}/TaskResourceSuite.scala | 5 ++--- .../spark}/iterator/IteratorBenchmark.scala | 6 +++--- .../sql/execution/ColumnarBuildSideRelation.scala | 2 +- .../spark/sql/execution/utils/ExecUtil.scala | 2 +- gluten-substrait/pom.xml | 15 --------------- .../gluten/planner/plan/GlutenPlanModel.scala | 3 +-- 19 files changed, 25 insertions(+), 42 deletions(-) rename {gluten-substrait/src/main/scala/org/apache/gluten/utils => gluten-core/src/main/scala/org/apache/gluten}/iterator/Iterators.scala (94%) rename {gluten-substrait/src/main/scala/org/apache/gluten/utils => gluten-core/src/main/scala/org/apache/gluten}/iterator/IteratorsV1.scala (98%) rename {gluten-substrait/src/main/scala/org/apache/spark/util => gluten-core/src/main/scala/org/apache/spark/task}/SparkTaskUtil.scala (97%) rename {gluten-substrait => gluten-core}/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java (100%) rename {gluten-substrait/src/test/scala/org/apache/gluten/utils => gluten-core/src/test/scala/org/apache/gluten}/iterator/IteratorSuite.scala (97%) rename {gluten-substrait/src/test/scala/org/apache/gluten/utils => gluten-core/src/test/scala/org/apache/gluten/task}/TaskResourceSuite.scala (95%) rename {gluten-substrait/src/test/scala/org/apache/spark/utils => gluten-core/src/test/scala/org/apache/spark}/iterator/IteratorBenchmark.scala (96%) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala index 7cf02f28d025..6063e8bf0ab9 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxIteratorApi.scala @@ -19,13 +19,13 @@ package org.apache.gluten.backendsapi.velox import org.apache.gluten.GlutenNumaBindingInfo import org.apache.gluten.backendsapi.IteratorApi import org.apache.gluten.execution._ +import org.apache.gluten.iterator.Iterators import org.apache.gluten.metrics.IMetrics import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.gluten.substrait.plan.PlanNode import org.apache.gluten.substrait.rel.{LocalFilesBuilder, LocalFilesNode, SplitInfo} import org.apache.gluten.substrait.rel.LocalFilesNode.ReadFileFormat import org.apache.gluten.utils._ -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized._ import org.apache.spark.{SparkConf, TaskContext} diff --git a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala b/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala index 5629811f4d22..f42b921ab684 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/datasource/ArrowCSVFileFormat.scala @@ -19,10 +19,10 @@ package org.apache.gluten.datasource import org.apache.gluten.columnarbatch.ColumnarBatches import org.apache.gluten.exception.SchemaMismatchException import org.apache.gluten.execution.RowToVeloxColumnarExec +import org.apache.gluten.iterator.Iterators import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators import org.apache.gluten.memory.arrow.pool.ArrowNativeMemoryPool import org.apache.gluten.utils.ArrowUtil -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized.ArrowWritableColumnVector import org.apache.spark.TaskContext diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala index 542c325f095e..a853778484b1 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/RowToVeloxColumnarExec.scala @@ -18,10 +18,10 @@ package org.apache.gluten.execution import org.apache.gluten.GlutenConfig import org.apache.gluten.columnarbatch.ColumnarBatches +import org.apache.gluten.iterator.Iterators import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators import org.apache.gluten.runtime.Runtimes import org.apache.gluten.utils.ArrowAbiUtil -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized._ import org.apache.spark.broadcast.Broadcast diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideRDD.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideRDD.scala index fe3c0b7e3938..0163178e59f4 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideRDD.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxBroadcastBuildSideRDD.scala @@ -16,7 +16,7 @@ */ package org.apache.gluten.execution -import org.apache.gluten.utils.iterator.Iterators +import org.apache.gluten.iterator.Iterators import org.apache.spark.{broadcast, SparkContext} import org.apache.spark.sql.execution.joins.BuildSideRelation diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxColumnarToRowExec.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxColumnarToRowExec.scala index 4bd553b01235..ec6531b717a2 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxColumnarToRowExec.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxColumnarToRowExec.scala @@ -19,8 +19,8 @@ package org.apache.gluten.execution import org.apache.gluten.columnarbatch.ColumnarBatches import org.apache.gluten.exception.GlutenNotSupportException import org.apache.gluten.extension.ValidationResult +import org.apache.gluten.iterator.Iterators import org.apache.gluten.runtime.Runtimes -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized.NativeColumnarToRowJniWrapper import org.apache.spark.broadcast.Broadcast diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxResizeBatchesExec.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxResizeBatchesExec.scala index d05e68f857db..ec62a33bdd32 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxResizeBatchesExec.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/VeloxResizeBatchesExec.scala @@ -17,8 +17,8 @@ package org.apache.gluten.execution import org.apache.gluten.extension.GlutenPlan +import org.apache.gluten.iterator.Iterators import org.apache.gluten.utils.VeloxBatchResizer -import org.apache.gluten.utils.iterator.Iterators import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow diff --git a/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala b/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala index 0e01c9d5d82f..19f286056e5b 100644 --- a/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala +++ b/backends-velox/src/main/scala/org/apache/spark/api/python/ColumnarArrowEvalPythonExec.scala @@ -19,9 +19,9 @@ package org.apache.spark.api.python import org.apache.gluten.columnarbatch.ColumnarBatches import org.apache.gluten.exception.GlutenException import org.apache.gluten.extension.GlutenPlan +import org.apache.gluten.iterator.Iterators import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators import org.apache.gluten.utils.PullOutProjectHelper -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized.ArrowWritableColumnVector import org.apache.spark.{ContextAwareIterator, SparkEnv, TaskContext} diff --git a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala index 7f4235fdf107..d6e7aa5b2244 100644 --- a/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala +++ b/backends-velox/src/main/scala/org/apache/spark/sql/execution/ColumnarCachedBatchSerializer.scala @@ -20,10 +20,10 @@ import org.apache.gluten.GlutenConfig import org.apache.gluten.backendsapi.BackendsApiManager import org.apache.gluten.columnarbatch.ColumnarBatches import org.apache.gluten.execution.{RowToVeloxColumnarExec, VeloxColumnarToRowExec} +import org.apache.gluten.iterator.Iterators import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators import org.apache.gluten.runtime.Runtimes import org.apache.gluten.utils.ArrowAbiUtil -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized.ColumnarBatchSerializerJniWrapper import org.apache.spark.internal.Logging diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/utils/iterator/Iterators.scala b/gluten-core/src/main/scala/org/apache/gluten/iterator/Iterators.scala similarity index 94% rename from gluten-substrait/src/main/scala/org/apache/gluten/utils/iterator/Iterators.scala rename to gluten-core/src/main/scala/org/apache/gluten/iterator/Iterators.scala index eedfa66cfeaf..2de1c7b4ed80 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/utils/iterator/Iterators.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/iterator/Iterators.scala @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.utils.iterator - -import org.apache.gluten.utils.iterator.IteratorsV1.WrapperBuilderV1 +package org.apache.gluten.iterator import org.apache.spark.TaskContext +import IteratorsV1.WrapperBuilderV1 + /** * Utility class to provide iterator wrappers for non-trivial use cases. E.g. iterators that manage * payload's lifecycle. diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/utils/iterator/IteratorsV1.scala b/gluten-core/src/main/scala/org/apache/gluten/iterator/IteratorsV1.scala similarity index 98% rename from gluten-substrait/src/main/scala/org/apache/gluten/utils/iterator/IteratorsV1.scala rename to gluten-core/src/main/scala/org/apache/gluten/iterator/IteratorsV1.scala index 1090c6944ae5..120d4cb2b026 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/utils/iterator/IteratorsV1.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/iterator/IteratorsV1.scala @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.utils.iterator - -import org.apache.gluten.utils.iterator.Iterators.WrapperBuilder +package org.apache.gluten.iterator import org.apache.spark.{InterruptibleIterator, TaskContext} import org.apache.spark.task.TaskResources +import Iterators.WrapperBuilder + import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.AtomicBoolean diff --git a/gluten-substrait/src/main/scala/org/apache/spark/util/SparkTaskUtil.scala b/gluten-core/src/main/scala/org/apache/spark/task/SparkTaskUtil.scala similarity index 97% rename from gluten-substrait/src/main/scala/org/apache/spark/util/SparkTaskUtil.scala rename to gluten-core/src/main/scala/org/apache/spark/task/SparkTaskUtil.scala index 21ef7bb0ac9a..ee6d357d5524 100644 --- a/gluten-substrait/src/main/scala/org/apache/spark/util/SparkTaskUtil.scala +++ b/gluten-core/src/main/scala/org/apache/spark/task/SparkTaskUtil.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.util +package org.apache.spark.task import org.apache.spark.TaskContext import org.apache.spark.memory.TaskMemoryManager diff --git a/gluten-substrait/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java b/gluten-core/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java similarity index 100% rename from gluten-substrait/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java rename to gluten-core/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java diff --git a/gluten-substrait/src/test/scala/org/apache/gluten/utils/iterator/IteratorSuite.scala b/gluten-core/src/test/scala/org/apache/gluten/iterator/IteratorSuite.scala similarity index 97% rename from gluten-substrait/src/test/scala/org/apache/gluten/utils/iterator/IteratorSuite.scala rename to gluten-core/src/test/scala/org/apache/gluten/iterator/IteratorSuite.scala index 9a70f235a339..b6b0bf2543ea 100644 --- a/gluten-substrait/src/test/scala/org/apache/gluten/utils/iterator/IteratorSuite.scala +++ b/gluten-core/src/test/scala/org/apache/gluten/iterator/IteratorSuite.scala @@ -14,9 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.utils.iterator +package org.apache.gluten.iterator -import org.apache.gluten.utils.iterator.Iterators.{V1, WrapperBuilder} +import org.apache.gluten.iterator.Iterators.{V1, WrapperBuilder} import org.apache.spark.task.TaskResources diff --git a/gluten-substrait/src/test/scala/org/apache/gluten/utils/TaskResourceSuite.scala b/gluten-core/src/test/scala/org/apache/gluten/task/TaskResourceSuite.scala similarity index 95% rename from gluten-substrait/src/test/scala/org/apache/gluten/utils/TaskResourceSuite.scala rename to gluten-core/src/test/scala/org/apache/gluten/task/TaskResourceSuite.scala index 47ffc8812d1c..026b717621be 100644 --- a/gluten-substrait/src/test/scala/org/apache/gluten/utils/TaskResourceSuite.scala +++ b/gluten-core/src/test/scala/org/apache/gluten/task/TaskResourceSuite.scala @@ -14,13 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.utils +package org.apache.gluten.task import org.apache.spark.memory.{MemoryConsumer, MemoryMode} import org.apache.spark.sql.catalyst.plans.SQLHelper import org.apache.spark.sql.internal.SQLConf -import org.apache.spark.task.{TaskResource, TaskResources} -import org.apache.spark.util.SparkTaskUtil +import org.apache.spark.task.{SparkTaskUtil, TaskResource, TaskResources} import org.scalatest.funsuite.AnyFunSuite diff --git a/gluten-substrait/src/test/scala/org/apache/spark/utils/iterator/IteratorBenchmark.scala b/gluten-core/src/test/scala/org/apache/spark/iterator/IteratorBenchmark.scala similarity index 96% rename from gluten-substrait/src/test/scala/org/apache/spark/utils/iterator/IteratorBenchmark.scala rename to gluten-core/src/test/scala/org/apache/spark/iterator/IteratorBenchmark.scala index 31f4848f3863..047deebfac1e 100644 --- a/gluten-substrait/src/test/scala/org/apache/spark/utils/iterator/IteratorBenchmark.scala +++ b/gluten-core/src/test/scala/org/apache/spark/iterator/IteratorBenchmark.scala @@ -14,10 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.utils.iterator +package org.apache.spark.iterator -import org.apache.gluten.utils.iterator.Iterators -import org.apache.gluten.utils.iterator.Iterators.V1 +import org.apache.gluten.iterator.Iterators +import org.apache.gluten.iterator.Iterators.V1 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.task.TaskResources diff --git a/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala b/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala index 09805cc2f324..5b34104a3f29 100644 --- a/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala +++ b/gluten-data/src/main/scala/org/apache/spark/sql/execution/ColumnarBuildSideRelation.scala @@ -17,11 +17,11 @@ package org.apache.spark.sql.execution import org.apache.gluten.columnarbatch.ColumnarBatches +import org.apache.gluten.iterator.Iterators import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators import org.apache.gluten.runtime.Runtimes import org.apache.gluten.sql.shims.SparkShimLoader import org.apache.gluten.utils.ArrowAbiUtil -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized.{ColumnarBatchSerializerJniWrapper, NativeColumnarToRowJniWrapper} import org.apache.spark.sql.catalyst.InternalRow diff --git a/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala b/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala index 65b06214e73b..9e06881427bd 100644 --- a/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala +++ b/gluten-data/src/main/scala/org/apache/spark/sql/execution/utils/ExecUtil.scala @@ -17,9 +17,9 @@ package org.apache.spark.sql.execution.utils import org.apache.gluten.columnarbatch.ColumnarBatches +import org.apache.gluten.iterator.Iterators import org.apache.gluten.memory.arrow.alloc.ArrowBufferAllocators import org.apache.gluten.runtime.Runtimes -import org.apache.gluten.utils.iterator.Iterators import org.apache.gluten.vectorized.{ArrowWritableColumnVector, NativeColumnarToRowInfo, NativeColumnarToRowJniWrapper, NativePartitioning} import org.apache.spark.{Partitioner, RangePartitioner, ShuffleDependency} diff --git a/gluten-substrait/pom.xml b/gluten-substrait/pom.xml index 9ace971f9d7e..77bb9f3c33e2 100644 --- a/gluten-substrait/pom.xml +++ b/gluten-substrait/pom.xml @@ -296,21 +296,6 @@ true - - compile-gluten-proto - generate-sources - - compile - test-compile - - - - com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier} - - src/main/resources/org/apache/gluten/proto - false - - diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/planner/plan/GlutenPlanModel.scala b/gluten-substrait/src/main/scala/org/apache/gluten/planner/plan/GlutenPlanModel.scala index 5df4f572c398..0b3adbbe4981 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/planner/plan/GlutenPlanModel.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/planner/plan/GlutenPlanModel.scala @@ -28,8 +28,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.Attribute import org.apache.spark.sql.execution.{ColumnarToRowExec, LeafExecNode, SparkPlan} -import org.apache.spark.task.TaskResources -import org.apache.spark.util.SparkTaskUtil +import org.apache.spark.task.{SparkTaskUtil, TaskResources} import java.util.{Objects, Properties}