From 82b48b38d10217ea4921248d3e4ffc5fc48877b7 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Tue, 17 Dec 2024 09:37:51 +0800 Subject: [PATCH] [GLUTEN-7911][CORE] Flip dependency direction for gluten-hudi (#8240) Closes #7911 --- backends-velox/pom.xml | 23 +++++++++++++++++++ .../org/apache/execution/VeloxHudiSuite.scala | 21 +++++++++++++++++ .../execution/VeloxTPCHHudiSuite.scala | 8 +++---- .../memtarget/spark/TreeMemoryConsumer.java | 3 ++- .../org/apache/spark/task/TaskResources.scala | 11 ++++++--- gluten-hudi/pom.xml | 13 ----------- .../{VeloxHudiSuite.scala => HudiSuite.scala} | 2 +- .../gluten/utils/BackendTestSettings.scala | 3 ++- .../AbstractFileSourceScanExec.scala | 4 +++- .../AbstractFileSourceScanExec.scala | 4 +++- .../AbstractFileSourceScanExec.scala | 4 +++- .../AbstractFileSourceScanExec.scala | 4 +++- 12 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala rename {gluten-hudi/src-hudi/test/scala/org/apache/gluten => backends-velox/src-hudi/test/scala/org/apache}/execution/VeloxTPCHHudiSuite.scala (91%) rename gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/{VeloxHudiSuite.scala => HudiSuite.scala} (98%) diff --git a/backends-velox/pom.xml b/backends-velox/pom.xml index ed0bf20616f7..9349c3c0923c 100755 --- a/backends-velox/pom.xml +++ b/backends-velox/pom.xml @@ -111,6 +111,29 @@ + + hudi + + + org.apache.gluten + gluten-hudi + ${project.version} + + + org.apache.gluten + gluten-hudi + ${project.version} + test-jar + test + + + org.apache.hudi + hudi-spark${sparkbundle.version}-bundle_${scala.binary.version} + ${hudi.version} + provided + + + diff --git a/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala new file mode 100644 index 000000000000..00498f87411a --- /dev/null +++ b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.execution + +import org.apache.gluten.execution.HudiSuite + +class VeloxHudiSuite extends HudiSuite {} diff --git a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala similarity index 91% rename from gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala rename to backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala index a4e10269c286..cdb3b2918080 100644 --- a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala +++ b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala @@ -14,16 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.execution +package org.apache.execution +import org.apache.gluten.execution.VeloxTPCHSuite import org.apache.spark.SparkConf import java.io.File class VeloxTPCHHudiSuite extends VeloxTPCHSuite { - - protected val tpchBasePath: String = new File( - "../backends-velox/src/test/resources").getAbsolutePath + protected val tpchBasePath: String = + getClass.getResource("/").getPath + "../../../src/test/resources" override protected val resourcePath: String = new File(tpchBasePath, "tpch-data-parquet").getCanonicalPath diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java index 44c725798c75..1289a01c349e 100644 --- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java +++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java @@ -138,7 +138,8 @@ public Map children() { @Override public TreeMemoryTarget parent() { // we are root - throw new IllegalStateException("Unreachable code"); + throw new IllegalStateException( + "Unreachable code org.apache.gluten.memory.memtarget.spark.TreeMemoryConsumer.parent"); } @Override diff --git a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala index b061aa332c74..df5917125b64 100644 --- a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala +++ b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala @@ -298,9 +298,14 @@ class TaskResourceRegistry extends Logging { o1: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]], o2: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]]) => { val diff = o2.getKey - o1.getKey // descending by priority - if (diff > 0) 1 - else if (diff < 0) -1 - else throw new IllegalStateException("Unreachable code") + if (diff > 0) { + 1 + } else if (diff < 0) { + -1 + } else { + throw new IllegalStateException( + "Unreachable code from org.apache.spark.task.TaskResourceRegistry.releaseAll") + } } ) table.forEach { diff --git a/gluten-hudi/pom.xml b/gluten-hudi/pom.xml index 7900182f853a..5865f1f6ece8 100755 --- a/gluten-hudi/pom.xml +++ b/gluten-hudi/pom.xml @@ -46,19 +46,6 @@ test-jar test - - org.apache.gluten - backends-velox - ${project.version} - test - - - org.apache.gluten - backends-velox - ${project.version} - test-jar - test - org.apache.spark spark-core_${scala.binary.version} diff --git a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala similarity index 98% rename from gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala rename to gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala index b760ec556535..97633fa064cc 100644 --- a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala +++ b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala @@ -19,7 +19,7 @@ package org.apache.gluten.execution import org.apache.spark.SparkConf import org.apache.spark.sql.Row -class VeloxHudiSuite extends WholeStageTransformerSuite { +abstract class HudiSuite extends WholeStageTransformerSuite { protected val rootPath: String = getClass.getResource("/").getPath override protected val resourcePath: String = "/tpch-data-parquet" diff --git a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala index dce8ac83710c..51e8174da7fb 100644 --- a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala +++ b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala @@ -80,7 +80,8 @@ abstract class BackendTestSettings { return !isExcluded } - throw new IllegalStateException("Unreachable code") + throw new IllegalStateException( + "Unreachable code from org.apache.gluten.utils.BackendTestSettings.shouldRun") } final protected class SuiteSettings { diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index a3bd5079b016..fcdd3c3c8b4b 100644 --- a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -73,7 +73,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = { diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index c885f0cf44b3..01df5ba62167 100644 --- a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -77,7 +77,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = { diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index 53ea6f543a95..15e54ddb71f2 100644 --- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = { diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index c8dbcc2fed4f..a83c763c4566 100644 --- a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = {