diff --git a/backends-clickhouse/src-iceberg/main/resources/META-INF/gluten-components/org.apache.gluten.component.CHIcebergComponent b/backends-clickhouse/src-iceberg/main/resources/META-INF/gluten-components/org.apache.gluten.component.CHIcebergComponent
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/backends-clickhouse/src-iceberg/main/resources/META-INF/services/org.apache.gluten.component.Component b/backends-clickhouse/src-iceberg/main/resources/META-INF/services/org.apache.gluten.component.Component
deleted file mode 100644
index a13f6fa739e8..000000000000
--- a/backends-clickhouse/src-iceberg/main/resources/META-INF/services/org.apache.gluten.component.Component
+++ /dev/null
@@ -1 +0,0 @@
-org.apache.gluten.component.CHIcebergComponent
diff --git a/backends-clickhouse/src/main/resources/META-INF/gluten-components/org.apache.gluten.backendsapi.clickhouse.CHBackend b/backends-clickhouse/src/main/resources/META-INF/gluten-components/org.apache.gluten.backendsapi.clickhouse.CHBackend
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/backends-clickhouse/src/main/resources/META-INF/services/org.apache.gluten.backend.Backend b/backends-clickhouse/src/main/resources/META-INF/services/org.apache.gluten.backend.Backend
deleted file mode 100644
index bcd3cb1c03a0..000000000000
--- a/backends-clickhouse/src/main/resources/META-INF/services/org.apache.gluten.backend.Backend
+++ /dev/null
@@ -1 +0,0 @@
-org.apache.gluten.backendsapi.clickhouse.CHBackend
diff --git a/backends-velox/src-iceberg/main/resources/META-INF/gluten-components/org.apache.gluten.component.VeloxIcebergComponent b/backends-velox/src-iceberg/main/resources/META-INF/gluten-components/org.apache.gluten.component.VeloxIcebergComponent
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/backends-velox/src-iceberg/main/resources/META-INF/services/org.apache.gluten.component.Component b/backends-velox/src-iceberg/main/resources/META-INF/services/org.apache.gluten.component.Component
deleted file mode 100644
index e9e844c6bb47..000000000000
--- a/backends-velox/src-iceberg/main/resources/META-INF/services/org.apache.gluten.component.Component
+++ /dev/null
@@ -1 +0,0 @@
-org.apache.gluten.component.VeloxIcebergComponent
diff --git a/backends-velox/src/main/resources/META-INF/gluten-components/org.apache.gluten.backendsapi.velox.VeloxBackend b/backends-velox/src/main/resources/META-INF/gluten-components/org.apache.gluten.backendsapi.velox.VeloxBackend
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/backends-velox/src/main/resources/META-INF/services/org.apache.gluten.backend.Backend b/backends-velox/src/main/resources/META-INF/services/org.apache.gluten.backend.Backend
deleted file mode 100644
index 7cc9b395911f..000000000000
--- a/backends-velox/src/main/resources/META-INF/services/org.apache.gluten.backend.Backend
+++ /dev/null
@@ -1 +0,0 @@
-org.apache.gluten.backendsapi.velox.VeloxBackend
diff --git a/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java
new file mode 100644
index 000000000000..692a91af2667
--- /dev/null
+++ b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.utils;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Enumeration;
+import java.util.List;
+import java.util.regex.Pattern;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipException;
+import java.util.zip.ZipFile;
+
+/**
+ * Code is copied from here
+ * and then modified for Gluten's use.
+ */
+public class ResourceUtil {
+
+ private static final Logger LOG = LoggerFactory.getLogger(ResourceUtil.class);
+
+ /**
+ * Get a collection of resource paths by the input RegEx pattern.
+ *
+ * @param pattern The pattern to match.
+ * @return The relative resource paths in the order they are found.
+ */
+ public static List getResources(final Pattern pattern) {
+ final List buffer = new ArrayList<>();
+ final String classPath = System.getProperty("java.class.path");
+ final String[] classPathElements = classPath.split(File.pathSeparator);
+ for (final String element : classPathElements) {
+ getResources(element, pattern, buffer);
+ }
+ return Collections.unmodifiableList(buffer);
+ }
+
+ private static void getResources(
+ final String element, final Pattern pattern, final List buffer) {
+ final File file = new File(element);
+ if (!file.exists()) {
+ LOG.info("Skip non-existing classpath: {}", element);
+ return;
+ }
+ if (file.isDirectory()) {
+ getResourcesFromDirectory(file, file, pattern, buffer);
+ } else {
+ getResourcesFromJarFile(file, pattern, buffer);
+ }
+ }
+
+ private static void getResourcesFromJarFile(
+ final File file, final Pattern pattern, final List buffer) {
+ ZipFile zf;
+ try {
+ zf = new ZipFile(file);
+ } catch (final ZipException e) {
+ throw new RuntimeException(e);
+ } catch (final IOException e) {
+ throw new RuntimeException(e);
+ }
+ final Enumeration e = zf.entries();
+ while (e.hasMoreElements()) {
+ final ZipEntry ze = (ZipEntry) e.nextElement();
+ final String fileName = ze.getName();
+ final boolean accept = pattern.matcher(fileName).matches();
+ if (accept) {
+ buffer.add(fileName);
+ }
+ }
+ try {
+ zf.close();
+ } catch (final IOException e1) {
+ throw new RuntimeException(e1);
+ }
+ }
+
+ private static void getResourcesFromDirectory(
+ final File root, final File directory, final Pattern pattern, final List buffer) {
+ final File[] fileList = directory.listFiles();
+ for (final File file : fileList) {
+ if (file.isDirectory()) {
+ getResourcesFromDirectory(root, file, pattern, buffer);
+ } else {
+ final String relative = root.toURI().relativize(file.toURI()).getPath();
+ final boolean accept = pattern.matcher(relative).matches();
+ if (accept) {
+ buffer.add(relative);
+ }
+ }
+ }
+ }
+}
diff --git a/gluten-core/src/main/scala/org/apache/gluten/component/Discovery.scala b/gluten-core/src/main/scala/org/apache/gluten/component/Discovery.scala
new file mode 100644
index 000000000000..2b8f060a69f7
--- /dev/null
+++ b/gluten-core/src/main/scala/org/apache/gluten/component/Discovery.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.gluten.component
+
+import org.apache.gluten.exception.GlutenException
+import org.apache.gluten.utils.ResourceUtil
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.util.SparkReflectionUtil
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.util.matching.Regex
+
+
+
+
+// format: off
+/**
+ * Gluten's global discovery to find all [[Component]] definitions in the classpath.
+ *
+ * We don't use [[java.util.ServiceLoader]] since it requires all the service files to have
+ * the same file name which is the class name of [[Component]], this causes the service files
+ * easily be overwritten by each other during Maven build. Typically, See code of
+ * `DefaultMavenFileFilter` used by Maven's `maven-resources-plugin`.
+ *
+ * Instead, Gluten defines its own way to register components. For example, placing the following
+ * component files to resource folder:
+ *
+ * META-INF
+ * \- gluten-components
+ * |- org.apache.gluten.component.AComponent
+ * \- org.apache.gluten.backend.BBackend
+ *
+ * Will cause the registration of component `AComponent` and backend `BBackend`.
+ *
+ * The content in a component file is not read so doesn't matter at the moment.
+ */
+// format: on
+private object Discovery extends Logging {
+ private val container: String = "META-INF/gluten-components"
+ private val componentFilePattern: Regex = s"^$container/(.+)$$".r
+
+ def discoverAll(): Seq[Component] = {
+ logInfo("Start discovering components in the current classpath... ")
+ val prev = System.currentTimeMillis()
+ val allFiles = ResourceUtil.getResources(componentFilePattern.pattern).asScala
+ val duration = System.currentTimeMillis() - prev
+ logInfo(s"Discovered component files: ${allFiles.mkString(", ")}. Duration: $duration ms.")
+ val deDup = mutable.Set[String]()
+ val out = allFiles.flatMap {
+ case componentFilePattern(className) =>
+ if (!deDup.add(className)) {
+ logWarning(s"Found duplicated component class $className in then classpath, ignoring.")
+ None
+ } else {
+ val clazz =
+ try {
+ SparkReflectionUtil.classForName(className)
+ } catch {
+ case e: ClassNotFoundException =>
+ throw new GlutenException(s"Component class not found: $className", e)
+ }
+ val instance = clazz.getDeclaredConstructor().newInstance().asInstanceOf[Component]
+ Some(instance)
+ }
+ case _ => None
+ }.toSeq
+ out
+ }
+}
diff --git a/gluten-core/src/main/scala/org/apache/gluten/component/package.scala b/gluten-core/src/main/scala/org/apache/gluten/component/package.scala
index f74b96729418..032a32d04121 100644
--- a/gluten-core/src/main/scala/org/apache/gluten/component/package.scala
+++ b/gluten-core/src/main/scala/org/apache/gluten/component/package.scala
@@ -16,15 +16,10 @@
*/
package org.apache.gluten
-import org.apache.gluten.backend.Backend
-
import org.apache.spark.internal.Logging
-import java.util.ServiceLoader
import java.util.concurrent.atomic.AtomicBoolean
-import scala.collection.JavaConverters._
-
package object component extends Logging {
private val allComponentsLoaded: AtomicBoolean = new AtomicBoolean(false)
@@ -34,9 +29,7 @@ package object component extends Logging {
}
// Load all components in classpath.
- val discoveredBackends = ServiceLoader.load(classOf[Backend]).asScala
- val discoveredComponents = ServiceLoader.load(classOf[Component]).asScala
- val all = discoveredBackends ++ discoveredComponents
+ val all = Discovery.discoverAll()
// Register all components.
all.foreach(_.ensureRegistered())
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendsApiManager.scala b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendsApiManager.scala
index 3b4e97afb361..4b6f674905af 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendsApiManager.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/BackendsApiManager.scala
@@ -21,17 +21,17 @@ import org.apache.gluten.component.Component
object BackendsApiManager {
private lazy val backend: SubstraitBackend = initializeInternal()
- /** Initialize all backends api. */
+ /** Initialize all backends apis. */
private def initializeInternal(): SubstraitBackend = {
val loadedSubstraitBackends = Component.sorted().filter(_.isInstanceOf[SubstraitBackend])
- assert(loadedSubstraitBackends.size == 1, "More than one Substrait backends are loaded")
+ assert(
+ loadedSubstraitBackends.size == 1,
+ s"Zero or more than one Substrait backends are loaded: " +
+ s"${loadedSubstraitBackends.map(_.name()).mkString(", ")}")
loadedSubstraitBackends.head.asInstanceOf[SubstraitBackend]
}
- /**
- * Automatically detect the backend api.
- * @return
- */
+ /** Automatically detect the backend api. */
def initialize(): String = {
getBackendName
}
diff --git a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
index 08c55d78a67a..b369fffd740c 100644
--- a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
+++ b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/BaseMixin.java
@@ -22,6 +22,7 @@
import org.apache.gluten.integration.ds.TpcdsSuite;
import org.apache.gluten.integration.h.TpchSuite;
import org.apache.log4j.Level;
+import org.apache.log4j.LogManager;
import org.apache.spark.SparkConf;
import picocli.CommandLine;
import scala.Predef;
@@ -120,6 +121,8 @@ public Integer runActions(Action[] actions) {
throw new IllegalArgumentException("Log level not found: " + logLevel);
}
+ LogManager.getRootLogger().setLevel(level);
+
scala.collection.immutable.Map extraSparkConfScala =
JavaConverters.mapAsScalaMapConverter(
mergeMapSafe(extraSparkConf, runModeEnumeration.extraSparkConf())).asScala().toMap(