FACTORIES = new ConcurrentHashMap<>();
+ private static final ReferenceMap FACTORIES = new ReferenceMap();
private TreeMemoryConsumers() {}
- private static Factory createOrGetFactory(long perTaskCapacity) {
- return FACTORIES.computeIfAbsent(perTaskCapacity, Factory::new);
- }
-
- /**
- * A hub to provide memory target instances whose shared size (in the same task) is limited to X,
- * X = executor memory / task slots.
- *
- * Using this to prevent OOMs if the delegated memory target could possibly hold large memory
- * blocks that are not spillable.
- *
- *
See GLUTEN-3030
- */
- public static Factory isolated() {
- return createOrGetFactory(GlutenConfig.getConf().conservativeTaskOffHeapMemorySize());
- }
-
- /**
- * This works as a legacy Spark memory consumer which grants as much as possible of memory
- * capacity to each task.
- */
- public static Factory shared() {
- return createOrGetFactory(TreeMemoryTarget.CAPACITY_UNLIMITED);
+ @SuppressWarnings("unchecked")
+ public static Factory factory(TaskMemoryManager tmm) {
+ synchronized (FACTORIES) {
+ return (Factory) FACTORIES.computeIfAbsent(tmm, m -> new Factory((TaskMemoryManager) m));
+ }
}
public static class Factory {
- private final ReferenceMap map = new ReferenceMap(ReferenceMap.WEAK, ReferenceMap.WEAK);
- private final long perTaskCapacity;
+ private final TreeMemoryConsumer sparkConsumer;
+ private final Map roots = new ConcurrentHashMap<>();
+
+ private Factory(TaskMemoryManager tmm) {
+ this.sparkConsumer = new TreeMemoryConsumer(tmm);
+ }
- private Factory(long perTaskCapacity) {
- this.perTaskCapacity = perTaskCapacity;
+ private TreeMemoryTarget ofCapacity(long capacity) {
+ return roots.computeIfAbsent(
+ capacity,
+ cap ->
+ sparkConsumer.newChild(
+ String.format("Capacity[%s]", Utils.bytesToString(cap)),
+ cap,
+ Spillers.NOOP,
+ Collections.emptyMap()));
}
- @SuppressWarnings("unchecked")
- private TreeMemoryTarget getSharedAccount(TaskMemoryManager tmm) {
- synchronized (map) {
- return (TreeMemoryTarget)
- map.computeIfAbsent(
- tmm,
- m -> {
- TreeMemoryTarget tmc = new TreeMemoryConsumer((TaskMemoryManager) m);
- return tmc.newChild(
- "root", perTaskCapacity, Spillers.NOOP, Collections.emptyMap());
- });
- }
+ /**
+ * This works as a legacy Spark memory consumer which grants as much as possible of memory
+ * capacity to each task.
+ */
+ public TreeMemoryTarget legacyRoot() {
+ return ofCapacity(TreeMemoryTarget.CAPACITY_UNLIMITED);
}
- public TreeMemoryTarget newConsumer(
- TaskMemoryManager tmm,
- String name,
- Spiller spiller,
- Map virtualChildren) {
- final TreeMemoryTarget account = getSharedAccount(tmm);
- return account.newChild(
- name, TreeMemoryConsumer.CAPACITY_UNLIMITED, spiller, virtualChildren);
+ /**
+ * A hub to provide memory target instances whose shared size (in the same task) is limited to
+ * X, X = executor memory / task slots.
+ *
+ * Using this to prevent OOMs if the delegated memory target could possibly hold large memory
+ * blocks that are not spill-able.
+ *
+ *
See GLUTEN-3030
+ */
+ public TreeMemoryTarget isolatedRoot() {
+ return ofCapacity(GlutenConfig.getConf().conservativeTaskOffHeapMemorySize());
}
}
}
diff --git a/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala b/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala
index d221fafce418..338854cf086c 100644
--- a/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala
+++ b/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala
@@ -111,7 +111,7 @@ object SparkMemoryUtil {
collectFromTaskMemoryManager(treeMemoryConsumer.getTaskMemoryManager)
}
- override def visit(node: TreeMemoryTargets.Node): String = {
+ override def visit(node: TreeMemoryConsumer.Node): String = {
node.parent().accept(this) // walk up to find the one bound with task memory manager
}
@@ -131,6 +131,10 @@ object SparkMemoryUtil {
dynamicOffHeapSizingMemoryTarget: DynamicOffHeapSizingMemoryTarget): String = {
dynamicOffHeapSizingMemoryTarget.delegated().accept(this)
}
+
+ override def visit(retryOnOomMemoryTarget: RetryOnOomMemoryTarget): String = {
+ retryOnOomMemoryTarget.target().accept(this)
+ }
})
}
diff --git a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala
index b061aa332c74..2f609b026db3 100644
--- a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala
+++ b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala
@@ -16,6 +16,7 @@
*/
package org.apache.spark.task
+import org.apache.gluten.GlutenConfig
import org.apache.gluten.task.TaskListener
import org.apache.spark.{TaskContext, TaskFailedReason, TaskKilledException, UnknownReason}
@@ -65,8 +66,8 @@ object TaskResources extends TaskListener with Logging {
properties.put(key, value)
case _ =>
}
- properties.setIfMissing("spark.memory.offHeap.enabled", "true")
- properties.setIfMissing("spark.memory.offHeap.size", "1TB")
+ properties.setIfMissing(GlutenConfig.SPARK_OFFHEAP_ENABLED, "true")
+ properties.setIfMissing(GlutenConfig.SPARK_OFFHEAP_SIZE_KEY, "1TB")
TaskContext.setTaskContext(newUnsafeTaskContext(properties))
}
@@ -298,9 +299,14 @@ class TaskResourceRegistry extends Logging {
o1: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]],
o2: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]]) => {
val diff = o2.getKey - o1.getKey // descending by priority
- if (diff > 0) 1
- else if (diff < 0) -1
- else throw new IllegalStateException("Unreachable code")
+ if (diff > 0) {
+ 1
+ } else if (diff < 0) {
+ -1
+ } else {
+ throw new IllegalStateException(
+ "Unreachable code from org.apache.spark.task.TaskResourceRegistry.releaseAll")
+ }
}
)
table.forEach {
diff --git a/gluten-core/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java b/gluten-core/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java
index befe449186e7..6cb38fe8d5d3 100644
--- a/gluten-core/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java
+++ b/gluten-core/src/test/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumerTest.java
@@ -49,13 +49,16 @@ public void setUp() throws Exception {
public void testIsolated() {
test(
() -> {
- final TreeMemoryConsumers.Factory factory = TreeMemoryConsumers.isolated();
+ final TreeMemoryConsumers.Factory factory =
+ TreeMemoryConsumers.factory(TaskContext.get().taskMemoryManager());
final TreeMemoryTarget consumer =
- factory.newConsumer(
- TaskContext.get().taskMemoryManager(),
- "FOO",
- Spillers.NOOP,
- Collections.emptyMap());
+ factory
+ .isolatedRoot()
+ .newChild(
+ "FOO",
+ TreeMemoryTarget.CAPACITY_UNLIMITED,
+ Spillers.NOOP,
+ Collections.emptyMap());
Assert.assertEquals(20, consumer.borrow(20));
Assert.assertEquals(70, consumer.borrow(70));
Assert.assertEquals(10, consumer.borrow(20));
@@ -64,16 +67,19 @@ public void testIsolated() {
}
@Test
- public void testShared() {
+ public void testLegacy() {
test(
() -> {
- final TreeMemoryConsumers.Factory factory = TreeMemoryConsumers.shared();
+ final TreeMemoryConsumers.Factory factory =
+ TreeMemoryConsumers.factory(TaskContext.get().taskMemoryManager());
final TreeMemoryTarget consumer =
- factory.newConsumer(
- TaskContext.get().taskMemoryManager(),
- "FOO",
- Spillers.NOOP,
- Collections.emptyMap());
+ factory
+ .legacyRoot()
+ .newChild(
+ "FOO",
+ TreeMemoryTarget.CAPACITY_UNLIMITED,
+ Spillers.NOOP,
+ Collections.emptyMap());
Assert.assertEquals(20, consumer.borrow(20));
Assert.assertEquals(70, consumer.borrow(70));
Assert.assertEquals(20, consumer.borrow(20));
@@ -82,22 +88,24 @@ public void testShared() {
}
@Test
- public void testIsolatedAndShared() {
+ public void testIsolatedAndLegacy() {
test(
() -> {
- final TreeMemoryTarget shared =
- TreeMemoryConsumers.shared()
- .newConsumer(
- TaskContext.get().taskMemoryManager(),
+ final TreeMemoryTarget legacy =
+ TreeMemoryConsumers.factory(TaskContext.get().taskMemoryManager())
+ .legacyRoot()
+ .newChild(
"FOO",
+ TreeMemoryTarget.CAPACITY_UNLIMITED,
Spillers.NOOP,
Collections.emptyMap());
- Assert.assertEquals(110, shared.borrow(110));
+ Assert.assertEquals(110, legacy.borrow(110));
final TreeMemoryTarget isolated =
- TreeMemoryConsumers.isolated()
- .newConsumer(
- TaskContext.get().taskMemoryManager(),
+ TreeMemoryConsumers.factory(TaskContext.get().taskMemoryManager())
+ .isolatedRoot()
+ .newChild(
"FOO",
+ TreeMemoryTarget.CAPACITY_UNLIMITED,
Spillers.NOOP,
Collections.emptyMap());
Assert.assertEquals(100, isolated.borrow(110));
@@ -109,36 +117,34 @@ public void testSpill() {
test(
() -> {
final Spillers.AppendableSpillerList spillers = Spillers.appendable();
- final TreeMemoryTarget shared =
- TreeMemoryConsumers.shared()
- .newConsumer(
- TaskContext.get().taskMemoryManager(),
- "FOO",
- spillers,
- Collections.emptyMap());
+ final TreeMemoryTarget legacy =
+ TreeMemoryConsumers.factory(TaskContext.get().taskMemoryManager())
+ .legacyRoot()
+ .newChild(
+ "FOO", TreeMemoryTarget.CAPACITY_UNLIMITED, spillers, Collections.emptyMap());
final AtomicInteger numSpills = new AtomicInteger(0);
final AtomicLong numSpilledBytes = new AtomicLong(0L);
spillers.append(
new Spiller() {
@Override
public long spill(MemoryTarget self, Phase phase, long size) {
- long repaid = shared.repay(size);
+ long repaid = legacy.repay(size);
numSpills.getAndIncrement();
numSpilledBytes.getAndAdd(repaid);
return repaid;
}
});
- Assert.assertEquals(300, shared.borrow(300));
- Assert.assertEquals(300, shared.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
Assert.assertEquals(1, numSpills.get());
Assert.assertEquals(200, numSpilledBytes.get());
- Assert.assertEquals(400, shared.usedBytes());
+ Assert.assertEquals(400, legacy.usedBytes());
- Assert.assertEquals(300, shared.borrow(300));
- Assert.assertEquals(300, shared.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
Assert.assertEquals(3, numSpills.get());
Assert.assertEquals(800, numSpilledBytes.get());
- Assert.assertEquals(400, shared.usedBytes());
+ Assert.assertEquals(400, legacy.usedBytes());
});
}
@@ -147,36 +153,34 @@ public void testOverSpill() {
test(
() -> {
final Spillers.AppendableSpillerList spillers = Spillers.appendable();
- final TreeMemoryTarget shared =
- TreeMemoryConsumers.shared()
- .newConsumer(
- TaskContext.get().taskMemoryManager(),
- "FOO",
- spillers,
- Collections.emptyMap());
+ final TreeMemoryTarget legacy =
+ TreeMemoryConsumers.factory(TaskContext.get().taskMemoryManager())
+ .legacyRoot()
+ .newChild(
+ "FOO", TreeMemoryTarget.CAPACITY_UNLIMITED, spillers, Collections.emptyMap());
final AtomicInteger numSpills = new AtomicInteger(0);
final AtomicLong numSpilledBytes = new AtomicLong(0L);
spillers.append(
new Spiller() {
@Override
public long spill(MemoryTarget self, Phase phase, long size) {
- long repaid = shared.repay(Long.MAX_VALUE);
+ long repaid = legacy.repay(Long.MAX_VALUE);
numSpills.getAndIncrement();
numSpilledBytes.getAndAdd(repaid);
return repaid;
}
});
- Assert.assertEquals(300, shared.borrow(300));
- Assert.assertEquals(300, shared.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
Assert.assertEquals(1, numSpills.get());
Assert.assertEquals(300, numSpilledBytes.get());
- Assert.assertEquals(300, shared.usedBytes());
+ Assert.assertEquals(300, legacy.usedBytes());
- Assert.assertEquals(300, shared.borrow(300));
- Assert.assertEquals(300, shared.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
+ Assert.assertEquals(300, legacy.borrow(300));
Assert.assertEquals(3, numSpills.get());
Assert.assertEquals(900, numSpilledBytes.get());
- Assert.assertEquals(300, shared.usedBytes());
+ Assert.assertEquals(300, legacy.usedBytes());
});
}
diff --git a/gluten-hudi/pom.xml b/gluten-hudi/pom.xml
index 7900182f853a..5865f1f6ece8 100755
--- a/gluten-hudi/pom.xml
+++ b/gluten-hudi/pom.xml
@@ -46,19 +46,6 @@
test-jar
test
-
- org.apache.gluten
- backends-velox
- ${project.version}
- test
-
-
- org.apache.gluten
- backends-velox
- ${project.version}
- test-jar
- test
-
org.apache.spark
spark-core_${scala.binary.version}
diff --git a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala
similarity index 98%
rename from gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala
rename to gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala
index b760ec556535..97633fa064cc 100644
--- a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala
+++ b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala
@@ -19,7 +19,7 @@ package org.apache.gluten.execution
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
-class VeloxHudiSuite extends WholeStageTransformerSuite {
+abstract class HudiSuite extends WholeStageTransformerSuite {
protected val rootPath: String = getClass.getResource("/").getPath
override protected val resourcePath: String = "/tpch-data-parquet"
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala
index 69cea9c5470d..984450bf164e 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala
@@ -16,12 +16,13 @@
*/
package org.apache.gluten.backendsapi
+import org.apache.gluten.execution.WriteFilesExecTransformer
import org.apache.gluten.substrait.expression.ExpressionNode
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.connector.read.InputPartition
import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.datasources.{FileFormat, HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
import org.apache.spark.sql.types.{DataType, DecimalType, StructType}
import org.apache.spark.util.collection.BitSet
@@ -75,7 +76,7 @@ trait TransformerApi {
/** This method is only used for CH backend tests */
def invalidateSQLExecutionResource(executionId: String): Unit = {}
- def genWriteParameters(fileFormat: FileFormat, writeOptions: Map[String, String]): Any
+ def genWriteParameters(write: WriteFilesExecTransformer): Any
/** use Hadoop Path class to encode the file path */
def encodeFilePathIfNeed(filePath: String): String = filePath
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
index a9d3a6282ae1..726dbdc3ef30 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
@@ -67,7 +67,7 @@ case class WriteFilesExecTransformer(
override def output: Seq[Attribute] = Seq.empty
- private val caseInsensitiveOptions = CaseInsensitiveMap(options)
+ val caseInsensitiveOptions: CaseInsensitiveMap[String] = CaseInsensitiveMap(options)
def getRelNode(
context: SubstraitContext,
@@ -99,8 +99,7 @@ case class WriteFilesExecTransformer(
ConverterUtils.collectAttributeNames(inputAttributes.toSeq)
val extensionNode = if (!validation) {
ExtensionBuilder.makeAdvancedExtension(
- BackendsApiManager.getTransformerApiInstance
- .genWriteParameters(fileFormat, caseInsensitiveOptions),
+ BackendsApiManager.getTransformerApiInstance.genWriteParameters(this),
SubstraitUtil.createEnhancement(originalInputAttributes)
)
} else {
diff --git a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala
new file mode 100644
index 000000000000..17640f461213
--- /dev/null
+++ b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.gluten.execution.GlutenPlan
+import org.apache.gluten.extension.columnar.transition.Convention
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.rdd.RDD
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
+import org.apache.spark.sql.catalyst.plans.physical.{Distribution, Partitioning}
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+/**
+ * Used to apply specified resource profile for the whole stage.
+ * @param child
+ * @param resourceProfile
+ * resource profile specified for child belong stage.
+ */
+@Experimental
+case class ApplyResourceProfileExec(child: SparkPlan, resourceProfile: ResourceProfile)
+ extends UnaryExecNode
+ with GlutenPlan {
+
+ override def batchType(): Convention.BatchType = {
+ Convention.get(child).batchType
+ }
+
+ override def rowType0(): Convention.RowType = {
+ Convention.get(child).rowType
+ }
+
+ override def outputPartitioning: Partitioning = {
+ child.outputPartitioning
+ }
+
+ override def requiredChildDistribution: scala.Seq[Distribution] = {
+ child.requiredChildDistribution
+ }
+
+ override def outputOrdering: scala.Seq[SortOrder] = {
+ child.outputOrdering
+ }
+
+ override def requiredChildOrdering: scala.Seq[scala.Seq[SortOrder]] = {
+ child.requiredChildOrdering
+ }
+
+ override protected def doExecute(): RDD[InternalRow] = {
+ log.info(s"Apply $resourceProfile for plan ${child.nodeName}")
+ child.execute.withResources(resourceProfile)
+ }
+
+ override protected def doExecuteColumnar(): RDD[ColumnarBatch] = {
+ log.info(s"Apply $resourceProfile for columnar plan ${child.nodeName}")
+ child.executeColumnar.withResources(resourceProfile)
+ }
+
+ override def output: scala.Seq[Attribute] = child.output
+
+ override protected def withNewChildInternal(newChild: SparkPlan): ApplyResourceProfileExec =
+ copy(child = newChild)
+}
diff --git a/gluten-uniffle/.gitkeep b/gluten-uniffle/.gitkeep
new file mode 100644
index 000000000000..f2d1254d2735
--- /dev/null
+++ b/gluten-uniffle/.gitkeep
@@ -0,0 +1 @@
+The module is kept for adding common code shared by backends for Uniffle support in Gluten.
diff --git a/gluten-uniffle/package/pom.xml b/gluten-uniffle/package/pom.xml
deleted file mode 100644
index e49748e7c8e9..000000000000
--- a/gluten-uniffle/package/pom.xml
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
-
- gluten-uniffle
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
- 4.0.0
-
- gluten-uniffle-package
- jar
- Gluten Uniffle Package
-
-
-
- backends-velox
-
-
- org.apache.gluten
- gluten-uniffle-velox
- ${project.version}
-
-
-
-
-
diff --git a/gluten-uniffle/pom.xml b/gluten-uniffle/pom.xml
index b7fe4c2e4268..efc8ce6555c5 100644
--- a/gluten-uniffle/pom.xml
+++ b/gluten-uniffle/pom.xml
@@ -11,7 +11,7 @@
4.0.0
gluten-uniffle
- pom
+ jar
Gluten Uniffle
@@ -75,15 +75,4 @@
-
-
- backends-velox
-
-
-
- velox
- package
-
-
-
diff --git a/gluten-uniffle/velox/pom.xml b/gluten-uniffle/velox/pom.xml
deleted file mode 100755
index ab730674fbb3..000000000000
--- a/gluten-uniffle/velox/pom.xml
+++ /dev/null
@@ -1,62 +0,0 @@
-
-
-
- gluten-uniffle
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
- 4.0.0
-
- gluten-uniffle-velox
- jar
- Gluten Uniffle Velox
-
-
-
- org.apache.gluten
- backends-velox
- ${project.version}
- provided
-
-
- org.apache.gluten
- gluten-arrow
- ${project.version}
- provided
-
-
-
-
- target/scala-${scala.binary.version}/classes
- target/scala-${scala.binary.version}/test-classes
-
-
- net.alchim31.maven
- scala-maven-plugin
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
-
- org.scalastyle
- scalastyle-maven-plugin
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
-
- com.diffplug.spotless
- spotless-maven-plugin
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
-
diff --git a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
index dce8ac83710c..51e8174da7fb 100644
--- a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
+++ b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
@@ -80,7 +80,8 @@ abstract class BackendTestSettings {
return !isExcluded
}
- throw new IllegalStateException("Unreachable code")
+ throw new IllegalStateException(
+ "Unreachable code from org.apache.gluten.utils.BackendTestSettings.shouldRun")
}
final protected class SuiteSettings {
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 36d5b5177c6b..16879489d29e 100644
--- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -286,6 +286,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("groupBy.as")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
+ .excludeGlutenTest("function to_date")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
.exclude("to_timestamp")
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 15495270a189..2c6b882850c4 100644
--- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -265,6 +265,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDataFrameFunctionsSuite]
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index 8d1f7320dd42..5ddfe6fc1ff3 100644
--- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -248,4 +248,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 407b9c8b95cc..f83b91ede1cc 100644
--- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1084,6 +1084,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4345..ae86c9d06e81 100644
--- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index dbb01fbe7067..b0446d3ca7b6 100644
--- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1101,6 +1101,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4345..ae86c9d06e81 100644
--- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
index b8ac906d8076..f2a83bf234a9 100644
--- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
+++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
@@ -40,15 +40,12 @@ class GlutenCastSuite extends CastSuiteBase with GlutenTestsTrait {
testGluten("missing cases - from boolean") {
(DataTypeTestUtils.numericTypeWithoutDecimal + BooleanType).foreach {
- t =>
- t match {
- case BooleanType =>
- checkEvaluation(cast(cast(true, BooleanType), t), true)
- checkEvaluation(cast(cast(false, BooleanType), t), false)
- case _ =>
- checkEvaluation(cast(cast(true, BooleanType), t), 1)
- checkEvaluation(cast(cast(false, BooleanType), t), 0)
- }
+ case t @ BooleanType =>
+ checkEvaluation(cast(cast(true, BooleanType), t), true)
+ checkEvaluation(cast(cast(false, BooleanType), t), false)
+ case t =>
+ checkEvaluation(cast(cast(true, BooleanType), t), 1)
+ checkEvaluation(cast(cast(false, BooleanType), t), 0)
}
}
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index f5a1a076956e..a01d0cb4b331 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1123,6 +1123,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4345..ae86c9d06e81 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/package/pom.xml b/package/pom.xml
index e0620e5cf5e1..b9c114181bcd 100644
--- a/package/pom.xml
+++ b/package/pom.xml
@@ -68,7 +68,7 @@
org.apache.gluten
- gluten-celeborn-package
+ gluten-celeborn
${project.version}
@@ -78,7 +78,7 @@
org.apache.gluten
- gluten-uniffle-package
+ gluten-uniffle
${project.version}
diff --git a/pom.xml b/pom.xml
index 3c59b4f19e11..4d704dc9b448 100644
--- a/pom.xml
+++ b/pom.xml
@@ -422,6 +422,70 @@
gluten-celeborn
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-celeborn-sources
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+ add-celeborn-resources
+ generate-resources
+
+ add-resource
+
+
+
+
+ ${project.basedir}/src-celeborn/main/resources
+
+
+
+
+
+ add-celeborn-test-sources
+ generate-test-sources
+
+ add-test-source
+
+
+
+
+
+
+
+
+
+ add-celeborn-test-resources
+ generate-test-resources
+
+ add-test-resource
+
+
+
+
+ ${project.basedir}/src-celeborn/test/resources
+
+
+
+
+
+
+
+
uniffle
@@ -431,6 +495,70 @@
gluten-uniffle
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-uniffle-sources
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+ add-uniffle-resources
+ generate-resources
+
+ add-resource
+
+
+
+
+ ${project.basedir}/src-uniffle/main/resources
+
+
+
+
+
+ add-uniffle-test-sources
+ generate-test-sources
+
+ add-test-source
+
+
+
+
+
+
+
+
+
+ add-uniffle-test-resources
+ generate-test-resources
+
+ add-test-resource
+
+
+
+
+ ${project.basedir}/src-uniffle/test/resources
+
+
+
+
+
+
+
+
delta
diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index a3bd5079b016..fcdd3c3c8b4b 100644
--- a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -73,7 +73,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {
diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index c885f0cf44b3..01df5ba62167 100644
--- a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -77,7 +77,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {
diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index 53ea6f543a95..15e54ddb71f2 100644
--- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {
diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index c8dbcc2fed4f..a83c763c4566 100644
--- a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {