From 76168034983bb4bba055e7e5940d3558880ea3a8 Mon Sep 17 00:00:00 2001 From: Xiduo You Date: Tue, 28 May 2024 15:46:35 +0800 Subject: [PATCH] [CORE] Only materialize subquery before doing transform (#5862) We transform subquery(e.g., dpp) during columanr rules which is not actually been executed, so we should not materialize subquery when replacing expression as it is not in concurrent. This pr wraps doTransform with transform to always do materialize subquery before doTransform, so that the subquries can be submitted in concurrent. --- .../CHHashAggregateExecTransformer.scala | 4 +- .../GlutenCustomAggExpressionSuite.scala | 2 +- .../benchmarks/CHParquetReadBenchmark.scala | 2 +- .../HashAggregateExecTransformer.scala | 4 +- .../gluten/execution/TopNTransformer.scala | 4 +- .../BasicPhysicalOperatorTransformer.scala | 6 +- .../execution/BasicScanExecTransformer.scala | 6 +- ...oadcastNestedLoopJoinExecTransformer.scala | 6 +- .../CartesianProductExecTransformer.scala | 6 +- .../execution/ExpandExecTransformer.scala | 4 +- .../GenerateExecTransformerBase.scala | 4 +- .../execution/JoinExecTransformer.scala | 6 +- .../gluten/execution/LimitTransformer.scala | 4 +- .../execution/SortExecTransformer.scala | 4 +- .../SortMergeJoinExecTransformer.scala | 6 +- .../execution/WholeStageTransformer.scala | 60 +- .../execution/WindowExecTransformer.scala | 4 +- .../WindowGroupLimitExecTransformer.scala | 4 +- .../execution/WriteFilesExecTransformer.scala | 4 +- .../expression/ExpressionConverter.scala | 21 +- .../ScalarSubqueryTransformer.scala | 21 +- .../columnar/enumerated/RemoveFilter.scala | 4 +- .../ColumnarCollapseTransformStages.scala | 2 +- .../python/EvalPythonExecTransformer.scala | 4 +- .../scalar-subquery-select.sql | 363 -------- .../scalar-subquery-select.sql.out | 791 ------------------ .../velox/VeloxSQLQueryTestSettings.scala | 6 +- .../utils/velox/VeloxTestSettings.scala | 2 - .../GlutenQueryExecutionErrorsSuite.scala | 11 - .../scalar-subquery-select.sql | 257 ------ .../scalar-subquery-select.sql.out | 614 -------------- .../velox/VeloxSQLQueryTestSettings.scala | 6 +- .../utils/velox/VeloxTestSettings.scala | 2 - .../GlutenQueryExecutionErrorsSuite.scala | 11 - 34 files changed, 88 insertions(+), 2167 deletions(-) delete mode 100644 gluten-ut/spark34/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql delete mode 100644 gluten-ut/spark34/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out delete mode 100644 gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql delete mode 100644 gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala index 4a4d345db1dd..7e688814381b 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/execution/CHHashAggregateExecTransformer.scala @@ -81,8 +81,8 @@ case class CHHashAggregateExecTransformer( } } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) val aggParams = new AggregationParams diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala index 3a2808e70f22..ba7d2c8f1935 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/extension/GlutenCustomAggExpressionSuite.scala @@ -91,7 +91,7 @@ class GlutenCustomAggExpressionSuite extends GlutenClickHouseTPCHAbstractSuite { assert(planExecs(3).isInstanceOf[HashAggregateExec]) val substraitContext = new SubstraitContext - planExecs(2).asInstanceOf[CHHashAggregateExecTransformer].doTransform(substraitContext) + planExecs(2).asInstanceOf[CHHashAggregateExecTransformer].transform(substraitContext) // Check the functions assert(substraitContext.registeredFunction.containsKey("custom_sum_double:req_fp64")) diff --git a/backends-clickhouse/src/test/scala/org/apache/spark/sql/execution/benchmarks/CHParquetReadBenchmark.scala b/backends-clickhouse/src/test/scala/org/apache/spark/sql/execution/benchmarks/CHParquetReadBenchmark.scala index 5e802eeed1e4..dc1431fa64fa 100644 --- a/backends-clickhouse/src/test/scala/org/apache/spark/sql/execution/benchmarks/CHParquetReadBenchmark.scala +++ b/backends-clickhouse/src/test/scala/org/apache/spark/sql/execution/benchmarks/CHParquetReadBenchmark.scala @@ -104,7 +104,7 @@ object CHParquetReadBenchmark extends SqlBasedBenchmark with CHSqlBasedBenchmark val scanTime = chFileScan.longMetric("scanTime") // Generate Substrait plan val substraitContext = new SubstraitContext - val transformContext = chFileScan.doTransform(substraitContext) + val transformContext = chFileScan.transform(substraitContext) val outNames = new java.util.ArrayList[String]() for (attr <- outputAttrs) { outNames.add(ConverterUtils.genColumnNameWithExprId(attr)) diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala index 2f447572406b..01ab56881936 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/HashAggregateExecTransformer.scala @@ -65,8 +65,8 @@ abstract class HashAggregateExecTransformer( super.output } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val aggParams = new AggregationParams val operatorId = context.nextOperatorId(this.nodeName) diff --git a/backends-velox/src/main/scala/org/apache/gluten/execution/TopNTransformer.scala b/backends-velox/src/main/scala/org/apache/gluten/execution/TopNTransformer.scala index 9df821fe10c5..c2d12415c78b 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/execution/TopNTransformer.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/execution/TopNTransformer.scala @@ -67,8 +67,8 @@ case class TopNTransformer( doNativeValidation(context, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) val relNode = getRelNode( diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala index e703295d0851..962ad6aca9d3 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicPhysicalOperatorTransformer.scala @@ -129,8 +129,8 @@ abstract class FilterExecTransformerBase(val cond: Expression, val input: SparkP doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val remainingCondition = getRemainingCondition val operatorId = context.nextOperatorId(this.nodeName) if (remainingCondition == null) { @@ -190,7 +190,7 @@ case class ProjectExecTransformer private (projectList: Seq[NamedExpression], ch BackendsApiManager.getMetricsApiInstance.genProjectTransformerMetricsUpdater(metrics) override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) if ((projectList == null || projectList.isEmpty) && childCtx != null) { // The computing for this project is not needed. diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala index b0bc0ea7b27d..2dd5aff766a9 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/BasicScanExecTransformer.scala @@ -80,7 +80,7 @@ trait BasicScanExecTransformer extends LeafTransformSupport with BaseDataSource val numOutputVectors = longMetric("outputVectors") val scanTime = longMetric("scanTime") val substraitContext = new SubstraitContext - val transformContext = doTransform(substraitContext) + val transformContext = transform(substraitContext) val outNames = filteRedundantField(outputAttributes()).map(ConverterUtils.genColumnNameWithExprId).asJava val planNode = @@ -117,7 +117,7 @@ trait BasicScanExecTransformer extends LeafTransformSupport with BaseDataSource } val substraitContext = new SubstraitContext - val relNode = doTransform(substraitContext).root + val relNode = transform(substraitContext).root doNativeValidation(substraitContext, relNode) } @@ -133,7 +133,7 @@ trait BasicScanExecTransformer extends LeafTransformSupport with BaseDataSource } } - override def doTransform(context: SubstraitContext): TransformContext = { + override protected def doTransform(context: SubstraitContext): TransformContext = { val output = filteRedundantField(outputAttributes()) val typeNodes = ConverterUtils.collectAttributeTypeNodes(output) val nameList = ConverterUtils.collectAttributeNamesWithoutExprId(output) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala index 19a2ec8541e2..2f666a811dab 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/BroadcastNestedLoopJoinExecTransformer.scala @@ -119,12 +119,12 @@ abstract class BroadcastNestedLoopJoinExecTransformer( } } - override def doTransform(context: SubstraitContext): TransformContext = { - val streamedPlanContext = streamedPlan.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val streamedPlanContext = streamedPlan.asInstanceOf[TransformSupport].transform(context) val (inputStreamedRelNode, inputStreamedOutput) = (streamedPlanContext.root, streamedPlanContext.outputAttributes) - val buildPlanContext = buildPlan.asInstanceOf[TransformSupport].doTransform(context) + val buildPlanContext = buildPlan.asInstanceOf[TransformSupport].transform(context) val (inputBuildRelNode, inputBuildOutput) = (buildPlanContext.root, buildPlanContext.outputAttributes) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/CartesianProductExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/CartesianProductExecTransformer.scala index c5a4a0eb81e8..91831f18493a 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/CartesianProductExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/CartesianProductExecTransformer.scala @@ -77,12 +77,12 @@ case class CartesianProductExecTransformer( BackendsApiManager.getMetricsApiInstance.genNestedLoopJoinTransformerMetricsUpdater(metrics) } - override def doTransform(context: SubstraitContext): TransformContext = { - val leftPlanContext = left.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val leftPlanContext = left.asInstanceOf[TransformSupport].transform(context) val (inputLeftRelNode, inputLeftOutput) = (leftPlanContext.root, leftPlanContext.outputAttributes) - val rightPlanContext = right.asInstanceOf[TransformSupport].doTransform(context) + val rightPlanContext = right.asInstanceOf[TransformSupport].transform(context) val (inputRightRelNode, inputRightOutput) = (rightPlanContext.root, rightPlanContext.outputAttributes) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala index aa98d88b29a8..362debb531ee 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/ExpandExecTransformer.scala @@ -110,8 +110,8 @@ case class ExpandExecTransformer( doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) if (projections == null || projections.isEmpty) { // The computing for this Expand is not needed. diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/GenerateExecTransformerBase.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/GenerateExecTransformerBase.scala index 5811f7b47fe4..b5c9b85aeb0d 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/GenerateExecTransformerBase.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/GenerateExecTransformerBase.scala @@ -76,8 +76,8 @@ abstract class GenerateExecTransformerBase( doNativeValidation(context, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val relNode = getRelNode(context, childCtx.root, getGeneratorNode(context), validation = false) TransformContext(child.output, output, relNode) } diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala index e47ad8c7b1bd..0414c95aa918 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/JoinExecTransformer.scala @@ -227,12 +227,12 @@ trait HashJoinLikeExecTransformer extends BaseJoinExec with TransformSupport { doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val streamedPlanContext = streamedPlan.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val streamedPlanContext = streamedPlan.asInstanceOf[TransformSupport].transform(context) val (inputStreamedRelNode, inputStreamedOutput) = (streamedPlanContext.root, streamedPlanContext.outputAttributes) - val buildPlanContext = buildPlan.asInstanceOf[TransformSupport].doTransform(context) + val buildPlanContext = buildPlan.asInstanceOf[TransformSupport].transform(context) val (inputBuildRelNode, inputBuildOutput) = (buildPlanContext.root, buildPlanContext.outputAttributes) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/LimitTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/LimitTransformer.scala index 3379542ad1d6..8859844be48e 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/LimitTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/LimitTransformer.scala @@ -53,8 +53,8 @@ case class LimitTransformer(child: SparkPlan, offset: Long, count: Long) doNativeValidation(context, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) val relNode = getRelNode(context, operatorId, offset, count, child.output, childCtx.root, false) TransformContext(child.output, child.output, relNode) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala index 9e4a20d40ad4..f79dc69e680b 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/SortExecTransformer.scala @@ -101,8 +101,8 @@ case class SortExecTransformer( doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) if (sortOrder == null || sortOrder.isEmpty) { // The computing for this project is not needed. diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala index 5ca11a53c88a..98b3666f84ba 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/SortMergeJoinExecTransformer.scala @@ -191,12 +191,12 @@ abstract class SortMergeJoinExecTransformerBase( doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val streamedPlanContext = streamedPlan.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val streamedPlanContext = streamedPlan.asInstanceOf[TransformSupport].transform(context) val (inputStreamedRelNode, inputStreamedOutput) = (streamedPlanContext.root, streamedPlanContext.outputAttributes) - val bufferedPlanContext = bufferedPlan.asInstanceOf[TransformSupport].doTransform(context) + val bufferedPlanContext = bufferedPlan.asInstanceOf[TransformSupport].transform(context) val (inputBuildRelNode, inputBuildOutput) = (bufferedPlanContext.root, bufferedPlanContext.outputAttributes) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala index b809ac4bf1a4..ed691fc09613 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/WholeStageTransformer.scala @@ -68,7 +68,22 @@ trait TransformSupport extends GlutenPlan { */ def columnarInputRDDs: Seq[RDD[ColumnarBatch]] - def doTransform(context: SubstraitContext): TransformContext = { + final def transform(context: SubstraitContext): TransformContext = { + if (isCanonicalizedPlan) { + throw new IllegalStateException( + "A canonicalized plan is not supposed to be executed transform.") + } + if (TransformerState.underValidationState) { + doTransform(context) + } else { + // Materialize subquery first before going to do transform. + executeQuery { + doTransform(context) + } + } + } + + protected def doTransform(context: SubstraitContext): TransformContext = { throw new UnsupportedOperationException( s"This operator doesn't support doTransform with SubstraitContext.") } @@ -182,7 +197,7 @@ case class WholeStageTransformer(child: SparkPlan, materializeInput: Boolean = f val substraitContext = new SubstraitContext val childCtx = child .asInstanceOf[TransformSupport] - .doTransform(substraitContext) + .transform(substraitContext) if (childCtx == null) { throw new NullPointerException(s"WholeStageTransformer can't do Transform on $child") } @@ -216,8 +231,6 @@ case class WholeStageTransformer(child: SparkPlan, materializeInput: Boolean = f } def doWholeStageTransform(): WholeStageTransformContext = { - // invoke SparkPlan.prepare to do subquery preparation etc. - super.prepare() val context = generateWholeStageTransformContext() if (conf.getConf(GlutenConfig.CACHE_WHOLE_STAGE_TRANSFORMER_CONTEXT)) { wholeStageTransformerContext = Some(context) @@ -257,6 +270,12 @@ case class WholeStageTransformer(child: SparkPlan, materializeInput: Boolean = f override def doExecuteColumnar(): RDD[ColumnarBatch] = { val pipelineTime: SQLMetric = longMetric("pipelineTime") + // We should do transform first to make sure all subqueries are materialized + val wsCtx = GlutenTimeMetric.withMillisTime { + doWholeStageTransform() + }( + t => + logOnLevel(substraitPlanLogLevel, s"$nodeName generating the substrait plan took: $t ms.")) val inputRDDs = new ColumnarInputRDDsWrapper(columnarInputRDDs) // Check if BatchScan exists. val basicScanExecTransformers = findAllScanTransformers() @@ -271,22 +290,11 @@ case class WholeStageTransformer(child: SparkPlan, materializeInput: Boolean = f val allScanPartitions = basicScanExecTransformers.map(_.getPartitions) val allScanSplitInfos = getSplitInfosFromPartitions(basicScanExecTransformers, allScanPartitions) - - val (wsCtx, inputPartitions) = GlutenTimeMetric.withMillisTime { - val wsCtx = doWholeStageTransform() - val partitions = - BackendsApiManager.getIteratorApiInstance.genPartitions( - wsCtx, - allScanSplitInfos, - basicScanExecTransformers) - - (wsCtx, partitions) - }( - t => - logOnLevel( - substraitPlanLogLevel, - s"$nodeName generating the substrait plan took: $t ms.")) - + val inputPartitions = + BackendsApiManager.getIteratorApiInstance.genPartitions( + wsCtx, + allScanSplitInfos, + basicScanExecTransformers) val rdd = new GlutenWholeStageColumnarRDD( sparkContext, inputPartitions, @@ -321,22 +329,18 @@ case class WholeStageTransformer(child: SparkPlan, materializeInput: Boolean = f * GlutenDataFrameAggregateSuite) in these cases, separate RDDs takes care of SCAN as a * result, genFinalStageIterator rather than genFirstStageIterator will be invoked */ - val resCtx = GlutenTimeMetric.withMillisTime(doWholeStageTransform()) { - t => - logOnLevel(substraitPlanLogLevel, s"$nodeName generating the substrait plan took: $t ms.") - } new WholeStageZippedPartitionsRDD( sparkContext, inputRDDs, numaBindingInfo, sparkConf, - resCtx, + wsCtx, pipelineTime, BackendsApiManager.getMetricsApiInstance.metricsUpdatingFunction( child, - resCtx.substraitContext.registeredRelMap, - resCtx.substraitContext.registeredJoinParams, - resCtx.substraitContext.registeredAggregationParams + wsCtx.substraitContext.registeredRelMap, + wsCtx.substraitContext.registeredJoinParams, + wsCtx.substraitContext.registeredAggregationParams ), materializeInput ) diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala index d7c3d3dd56a4..ef6a767b5604 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/WindowExecTransformer.scala @@ -179,8 +179,8 @@ case class WindowExecTransformer( doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) if (windowExpression == null || windowExpression.isEmpty) { // The computing for this operator is not needed. diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/WindowGroupLimitExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/WindowGroupLimitExecTransformer.scala index bba79fa761e8..46a4e1aa4eee 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/WindowGroupLimitExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/WindowGroupLimitExecTransformer.scala @@ -146,8 +146,8 @@ case class WindowGroupLimitExecTransformer( doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) val currRel = diff --git a/gluten-core/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala index 7df4afa8a6c1..14d58bfa8377 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala @@ -161,8 +161,8 @@ case class WriteFilesExecTransformer( doNativeValidation(substraitContext, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val operatorId = context.nextOperatorId(this.nodeName) val currRel = getRelNode(context, getFinalChildOutput(), operatorId, childCtx.root, validation = false) diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala index 2d514118ac30..b66ec89eaf2b 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ExpressionConverter.scala @@ -654,7 +654,7 @@ object ExpressionConverter extends SQLConfHelper with Logging { // or ColumnarBroadcastExchange was disabled. partitionFilters } else { - val newPartitionFilters = partitionFilters.map { + partitionFilters.map { case dynamicPruning: DynamicPruningExpression => dynamicPruning.transform { // Lookup inside subqueries for duplicate exchanges. @@ -723,25 +723,6 @@ object ExpressionConverter extends SQLConfHelper with Logging { } case e: Expression => e } - updateSubqueryResult(newPartitionFilters) - newPartitionFilters - } - } - - private def updateSubqueryResult(partitionFilters: Seq[Expression]): Unit = { - // When it includes some DynamicPruningExpression, - // it needs to execute InSubqueryExec first, - // because doTransform path can't execute 'doExecuteColumnar' which will - // execute prepare subquery first. - partitionFilters.foreach { - case DynamicPruningExpression(inSubquery: InSubqueryExec) => - if (inSubquery.values().isEmpty) inSubquery.updateResult() - case e: Expression => - e.foreach { - case s: ScalarSubquery => s.updateResult() - case _ => - } - case _ => } } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/expression/ScalarSubqueryTransformer.scala b/gluten-core/src/main/scala/org/apache/gluten/expression/ScalarSubqueryTransformer.scala index 0accf9ffd0f9..9508d27df73b 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/expression/ScalarSubqueryTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/expression/ScalarSubqueryTransformer.scala @@ -18,6 +18,7 @@ package org.apache.gluten.expression import org.apache.gluten.substrait.expression.{ExpressionBuilder, ExpressionNode} +import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.execution.ScalarSubquery @@ -30,21 +31,11 @@ case class ScalarSubqueryTransformer(substraitExprName: String, query: ScalarSub if (TransformerState.underValidationState) { return ExpressionBuilder.makeLiteral(null, query.dataType, true) } - // the first column in first row from `query`. - val rows = query.plan.executeCollect() - if (rows.length > 1) { - throw new IllegalStateException( - s"more than one row returned by a subquery used as an expression:\n${query.plan}") - } - val result: AnyRef = if (rows.length == 1) { - assert( - rows(0).numFields == 1, - s"Expects 1 field, but got ${rows(0).numFields}; something went wrong in analysis") - rows(0).get(0, query.dataType) - } else { - // If there is no rows returned, the result should be null. - null - } + // After https://github.com/apache/incubator-gluten/pull/5862, we do not need to execute + // subquery manually so the exception behavior is same with vanilla Spark. + // Note that, this code change is just for simplify. The subquery has already been materialized + // before doing transform. + val result = query.eval(InternalRow.empty) ExpressionBuilder.makeLiteral(result, query.dataType, result == null) } } diff --git a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/enumerated/RemoveFilter.scala b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/enumerated/RemoveFilter.scala index 55b29cd56ff1..b980c24227d5 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/enumerated/RemoveFilter.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/extension/columnar/enumerated/RemoveFilter.scala @@ -75,8 +75,8 @@ object RemoveFilter extends RasRule[SparkPlan] { override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan = copy(newChild) override def outputPartitioning: Partitioning = child.outputPartitioning override def outputOrdering: Seq[SortOrder] = child.outputOrdering - override def doTransform(context: SubstraitContext): TransformContext = - child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = + child.asInstanceOf[TransformSupport].transform(context) override protected def doExecuteColumnar(): RDD[ColumnarBatch] = child.executeColumnar() } } diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseTransformStages.scala b/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseTransformStages.scala index 23746846e9cf..c9bbf4e1c0eb 100644 --- a/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseTransformStages.scala +++ b/gluten-core/src/main/scala/org/apache/spark/sql/execution/ColumnarCollapseTransformStages.scala @@ -65,7 +65,7 @@ case class InputIteratorTransformer(child: SparkPlan) extends UnaryTransformSupp child.doExecuteBroadcast() } - override def doTransform(context: SubstraitContext): TransformContext = { + override protected def doTransform(context: SubstraitContext): TransformContext = { val operatorId = context.nextOperatorId(nodeName) val readRel = RelBuilder.makeReadRelForInputIterator(child.output.asJava, context, operatorId) TransformContext(output, output, readRel) diff --git a/gluten-core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExecTransformer.scala b/gluten-core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExecTransformer.scala index 43dd2f453803..ecedc1bae01c 100644 --- a/gluten-core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExecTransformer.scala +++ b/gluten-core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExecTransformer.scala @@ -84,8 +84,8 @@ case class EvalPythonExecTransformer( doNativeValidation(context, relNode) } - override def doTransform(context: SubstraitContext): TransformContext = { - val childCtx = child.asInstanceOf[TransformSupport].doTransform(context) + override protected def doTransform(context: SubstraitContext): TransformContext = { + val childCtx = child.asInstanceOf[TransformSupport].transform(context) val args = context.registeredFunction val operatorId = context.nextOperatorId(this.nodeName) val expressionNodes = new JArrayList[ExpressionNode] diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/gluten-ut/spark34/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql deleted file mode 100644 index 48d1594fa51a..000000000000 --- a/gluten-ut/spark34/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ /dev/null @@ -1,363 +0,0 @@ --- A test suite for scalar subquery in SELECT clause - -create temporary view t1 as select * from values - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), - ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), - ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), - ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), - ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), - ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), - ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') - as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i); - -create temporary view t2 as select * from values - ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), - ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), - ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), - ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), - ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) - as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i); - -create temporary view t3 as select * from values - ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), - ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), - ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), - ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), - ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), - ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') - as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i); - --- Group 1: scalar subquery in SELECT clause --- no correlation --- TC 01.01 --- more than one scalar subquery -SELECT (SELECT min(t3d) FROM t3) min_t3d, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c'; - --- TC 01.02 --- scalar subquery in an IN subquery -SELECT t1a, count(*) -FROM t1 -WHERE t1c IN (SELECT (SELECT min(t3c) FROM t3) - FROM t2 - GROUP BY t2g - HAVING count(*) > 1) -GROUP BY t1a; - --- TC 01.03 --- under a set op -SELECT (SELECT min(t3d) FROM t3) min_t3d, - null -FROM t1 -WHERE t1a = 'val1c' -UNION -SELECT null, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c'; - --- TC 01.04 -SELECT (SELECT min(t3c) FROM t3) min_t3d -FROM t1 -WHERE t1a = 'val1a' -INTERSECT -SELECT (SELECT min(t2c) FROM t2) min_t2d -FROM t1 -WHERE t1a = 'val1d'; - --- TC 01.05 -SELECT q1.t1a, q2.t2a, q1.min_t3d, q2.avg_t3d -FROM (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d - FROM t1 - WHERE t1a IN ('val1e', 'val1c')) q1 - FULL OUTER JOIN - (SELECT t2a, (SELECT avg(t3d) FROM t3) avg_t3d - FROM t2 - WHERE t2a IN ('val1c', 'val2a')) q2 -ON q1.t1a = q2.t2a -AND q1.min_t3d < q2.avg_t3d; - --- Group 2: scalar subquery in SELECT clause --- with correlation --- TC 02.01 -SELECT (SELECT min(t3d) FROM t3 WHERE t3.t3a = t1.t1a) min_t3d, - (SELECT max(t2h) FROM t2 WHERE t2.t2a = t1.t1a) max_t2h -FROM t1 -WHERE t1a = 'val1b'; - --- TC 02.02 -SELECT (SELECT min(t3d) FROM t3 WHERE t3a = t1a) min_t3d -FROM t1 -WHERE t1a = 'val1b' -MINUS -SELECT (SELECT min(t3d) FROM t3) abs_min_t3d -FROM t1 -WHERE t1a = 'val1b'; - --- TC 02.03 -SELECT t1a, t1b -FROM t1 -WHERE NOT EXISTS (SELECT (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) dummy - FROM t3 - WHERE t3b < (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) - AND t3a = t1a); - --- SPARK-34876: Non-nullable aggregates should not return NULL in a correlated subquery -SELECT t1a, - (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2, - (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2, - (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) approx_count_distinct_t2, - (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2, - (SELECT sort_array(collect_set(t2d)) FROM t2 WHERE t2a = t1a) collect_set_t2, - (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) collect_set_t2 -FROM t1; - --- SPARK-36028: Allow Project to host outer references in scalar subqueries -SELECT t1c, (SELECT t1c) FROM t1; -SELECT t1c, (SELECT t1c WHERE t1c = 8) FROM t1; -SELECT t1c, t1d, (SELECT c + d FROM (SELECT t1c AS c, t1d AS d)) FROM t1; -SELECT t1c, (SELECT SUM(c) FROM (SELECT t1c AS c)) FROM t1; -SELECT t1a, (SELECT SUM(t2b) FROM t2 JOIN (SELECT t1a AS a) ON t2a = a) FROM t1; - --- CTE in correlated scalar subqueries -CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t1(c1, c2); -CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 2), (0, 3) t2(c1, c2); - --- Single row subquery -SELECT c1, (WITH t AS (SELECT 1 AS a) SELECT a + c1 FROM t) FROM t1; --- Correlation in CTE. -SELECT c1, (WITH t AS (SELECT * FROM t2 WHERE c1 = t1.c1) SELECT SUM(c2) FROM t) FROM t1; --- Multiple CTE definitions. -SELECT c1, ( - WITH t3 AS (SELECT c1 + 1 AS c1, c2 + 1 AS c2 FROM t2), - t4 AS (SELECT * FROM t3 WHERE t1.c1 = c1) - SELECT SUM(c2) FROM t4 -) FROM t1; --- Multiple CTE references. -SELECT c1, ( - WITH t AS (SELECT * FROM t2) - SELECT SUM(c2) FROM (SELECT c1, c2 FROM t UNION SELECT c2, c1 FROM t) r(c1, c2) - WHERE c1 = t1.c1 -) FROM t1; --- Reference CTE in both the main query and the subquery. -WITH v AS (SELECT * FROM t2) -SELECT * FROM t1 WHERE c1 > ( - WITH t AS (SELECT * FROM t2) - SELECT COUNT(*) FROM v WHERE c1 = t1.c1 AND c1 > (SELECT SUM(c2) FROM t WHERE c1 = v.c1) -); --- Single row subquery that references CTE in the main query. -WITH t AS (SELECT 1 AS a) -SELECT c1, (SELECT a FROM t WHERE a = c1) FROM t1; --- Multiple CTE references with non-deterministic CTEs. -WITH -v1 AS (SELECT c1, c2, rand(0) c3 FROM t1), -v2 AS (SELECT c1, c2, rand(0) c4 FROM v1 WHERE c3 IN (SELECT c3 FROM v1)) -SELECT c1, ( - WITH v3 AS (SELECT c1, c2, rand(0) c5 FROM t2) - SELECT COUNT(*) FROM ( - SELECT * FROM v2 WHERE c1 > 0 - UNION SELECT * FROM v2 WHERE c2 > 0 - UNION SELECT * FROM v3 WHERE c2 > 0 - ) WHERE c1 = v1.c1 -) FROM v1; - --- Multi-value subquery error -SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b; - --- SPARK-36114: Support correlated non-equality predicates -CREATE OR REPLACE TEMP VIEW t1(c1, c2) AS (VALUES (0, 1), (1, 2)); -CREATE OR REPLACE TEMP VIEW t2(c1, c2) AS (VALUES (0, 2), (0, 3)); - --- Neumann example Q2 -CREATE OR REPLACE TEMP VIEW students(id, name, major, year) AS (VALUES - (0, 'A', 'CS', 2022), - (1, 'B', 'CS', 2022), - (2, 'C', 'Math', 2022)); -CREATE OR REPLACE TEMP VIEW exams(sid, course, curriculum, grade, date) AS (VALUES - (0, 'C1', 'CS', 4, 2020), - (0, 'C2', 'CS', 3, 2021), - (1, 'C1', 'CS', 2, 2020), - (1, 'C2', 'CS', 1, 2021)); - -SELECT students.name, exams.course -FROM students, exams -WHERE students.id = exams.sid - AND (students.major = 'CS' OR students.major = 'Games Eng') - AND exams.grade >= ( - SELECT avg(exams.grade) + 1 - FROM exams - WHERE students.id = exams.sid - OR (exams.curriculum = students.major AND students.year > exams.date)); - --- Correlated non-equality predicates -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 > t2.c1) FROM t1; -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 >= t2.c1 AND t1.c2 < t2.c2) FROM t1; - --- Correlated non-equality predicates with the COUNT bug. -SELECT (SELECT count(*) FROM t2 WHERE t1.c1 > t2.c1) FROM t1; - --- Correlated equality predicates that are not supported after SPARK-35080 -SELECT c, ( - SELECT count(*) - FROM (VALUES ('ab'), ('abc'), ('bc')) t2(c) - WHERE t1.c = substring(t2.c, 1, 1) -) FROM (VALUES ('a'), ('b')) t1(c); - -SELECT c, ( - SELECT count(*) - FROM (VALUES (0, 6), (1, 5), (2, 4), (3, 3)) t1(a, b) - WHERE a + b = c -) FROM (VALUES (6)) t2(c); - --- SPARK-43156: scalar subquery with Literal result like `COUNT(1) is null` -SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1; - -select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2; - --- Set operations in correlation path - -CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0); -CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3); -CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7); - -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2a = t0a) -) -FROM t0; - -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a > t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b <= t0b) -) -FROM t0; - -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - --- Tests for column aliasing -SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM - (SELECT t1c as t1a, t1a as t1b, t0a as t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t0a as t2b, t2c as t1a, t0b as t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - --- Test handling of COUNT bug -SELECT t0a, (SELECT count(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - --- Correlated references in project -SELECT t0a, (SELECT sum(d) FROM - (SELECT t1a - t0a as d - FROM t1 - UNION ALL - SELECT t2a - t0a as d - FROM t2) -) -FROM t0; - --- Correlated references in aggregate - unsupported -SELECT t0a, (SELECT sum(d) FROM - (SELECT sum(t0a) as d - FROM t1 - UNION ALL - SELECT sum(t2a) + t0a as d - FROM t2) -) -FROM t0; - --- SPARK-43760: the result of the subquery can be NULL. -select * -from -( - select t1.id c1, ( - select sum(c) - from ( - select t2.id * t2.id c - from range (1, 2) t2 where t1.id = t2.id - group by t2.id - ) - ) c2 - from range (1, 3) t1 -) t -where t.c2 is not null; diff --git a/gluten-ut/spark34/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/gluten-ut/spark34/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out deleted file mode 100644 index 088359d39b86..000000000000 --- a/gluten-ut/spark34/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ /dev/null @@ -1,791 +0,0 @@ --- Automatically generated by GlutenSQLQueryTestSuite --- Number of queries: 52 - - --- !query -create temporary view t1 as select * from values - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), - ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), - ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), - ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), - ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), - ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), - ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') - as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i) --- !query schema -struct<> --- !query output - - - --- !query -create temporary view t2 as select * from values - ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), - ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), - ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), - ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), - ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) - as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i) --- !query schema -struct<> --- !query output - - - --- !query -create temporary view t3 as select * from values - ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), - ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), - ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), - ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), - ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), - ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') - as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i) --- !query schema -struct<> --- !query output - - - --- !query -SELECT (SELECT min(t3d) FROM t3) min_t3d, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c' --- !query schema -struct --- !query output -10 2017-05-04 01:01:00 - - --- !query -SELECT t1a, count(*) -FROM t1 -WHERE t1c IN (SELECT (SELECT min(t3c) FROM t3) - FROM t2 - GROUP BY t2g - HAVING count(*) > 1) -GROUP BY t1a --- !query schema -struct --- !query output -val1a 2 - - --- !query -SELECT (SELECT min(t3d) FROM t3) min_t3d, - null -FROM t1 -WHERE t1a = 'val1c' -UNION -SELECT null, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c' --- !query schema -struct --- !query output -10 NULL -NULL 2017-05-04 01:01:00 - - --- !query -SELECT (SELECT min(t3c) FROM t3) min_t3d -FROM t1 -WHERE t1a = 'val1a' -INTERSECT -SELECT (SELECT min(t2c) FROM t2) min_t2d -FROM t1 -WHERE t1a = 'val1d' --- !query schema -struct --- !query output -12 - - --- !query -SELECT q1.t1a, q2.t2a, q1.min_t3d, q2.avg_t3d -FROM (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d - FROM t1 - WHERE t1a IN ('val1e', 'val1c')) q1 - FULL OUTER JOIN - (SELECT t2a, (SELECT avg(t3d) FROM t3) avg_t3d - FROM t2 - WHERE t2a IN ('val1c', 'val2a')) q2 -ON q1.t1a = q2.t2a -AND q1.min_t3d < q2.avg_t3d --- !query schema -struct --- !query output -NULL val2a NULL 200.83333333333334 -val1c val1c 10 200.83333333333334 -val1c val1c 10 200.83333333333334 -val1e NULL 10 NULL -val1e NULL 10 NULL -val1e NULL 10 NULL - - --- !query -SELECT (SELECT min(t3d) FROM t3 WHERE t3.t3a = t1.t1a) min_t3d, - (SELECT max(t2h) FROM t2 WHERE t2.t2a = t1.t1a) max_t2h -FROM t1 -WHERE t1a = 'val1b' --- !query schema -struct --- !query output -19 2017-05-04 01:01:00 - - --- !query -SELECT (SELECT min(t3d) FROM t3 WHERE t3a = t1a) min_t3d -FROM t1 -WHERE t1a = 'val1b' -MINUS -SELECT (SELECT min(t3d) FROM t3) abs_min_t3d -FROM t1 -WHERE t1a = 'val1b' --- !query schema -struct --- !query output -19 - - --- !query -SELECT t1a, t1b -FROM t1 -WHERE NOT EXISTS (SELECT (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) dummy - FROM t3 - WHERE t3b < (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) - AND t3a = t1a) --- !query schema -struct --- !query output -val1a 16 -val1a 16 -val1a 6 -val1a 6 -val1c 8 -val1d 10 -val1d NULL -val1d NULL -val1e 10 -val1e 10 -val1e 10 - - --- !query -SELECT t1a, - (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2, - (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2, - (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) approx_count_distinct_t2, - (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2, - (SELECT sort_array(collect_set(t2d)) FROM t2 WHERE t2a = t1a) collect_set_t2, - (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) collect_set_t2 -FROM t1 --- !query schema -struct,collect_set_t2:array,collect_set_t2:string> --- !query output -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1b 6 6 3 [19,119,319,19,19,19] [19,119,319] 0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001 -val1c 2 2 2 [219,19] [19,219] 0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001 -val1d 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1d 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1d 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 -val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 -val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 - - --- !query -SELECT t1c, (SELECT t1c) FROM t1 --- !query schema -struct --- !query output -12 12 -12 12 -16 16 -16 16 -16 16 -16 16 -8 8 -8 8 -NULL NULL -NULL NULL -NULL NULL -NULL NULL - - --- !query -SELECT t1c, (SELECT t1c WHERE t1c = 8) FROM t1 --- !query schema -struct --- !query output -12 NULL -12 NULL -16 NULL -16 NULL -16 NULL -16 NULL -8 8 -8 8 -NULL NULL -NULL NULL -NULL NULL -NULL NULL - - --- !query -SELECT t1c, t1d, (SELECT c + d FROM (SELECT t1c AS c, t1d AS d)) FROM t1 --- !query schema -struct --- !query output -12 10 22 -12 21 33 -16 19 35 -16 19 35 -16 19 35 -16 22 38 -8 10 18 -8 10 18 -NULL 12 NULL -NULL 19 NULL -NULL 19 NULL -NULL 25 NULL - - --- !query -SELECT t1c, (SELECT SUM(c) FROM (SELECT t1c AS c)) FROM t1 --- !query schema -struct --- !query output -12 12 -12 12 -16 16 -16 16 -16 16 -16 16 -8 8 -8 8 -NULL NULL -NULL NULL -NULL NULL -NULL NULL - - --- !query -SELECT t1a, (SELECT SUM(t2b) FROM t2 JOIN (SELECT t1a AS a) ON t2a = a) FROM t1 --- !query schema -struct --- !query output -val1a NULL -val1a NULL -val1a NULL -val1a NULL -val1b 36 -val1c 24 -val1d NULL -val1d NULL -val1d NULL -val1e 8 -val1e 8 -val1e 8 - - --- !query -CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t1(c1, c2) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 2), (0, 3) t2(c1, c2) --- !query schema -struct<> --- !query output - - - --- !query -SELECT c1, (WITH t AS (SELECT 1 AS a) SELECT a + c1 FROM t) FROM t1 --- !query schema -struct --- !query output -0 1 -1 2 - - --- !query -SELECT c1, (WITH t AS (SELECT * FROM t2 WHERE c1 = t1.c1) SELECT SUM(c2) FROM t) FROM t1 --- !query schema -struct --- !query output -0 5 -1 NULL - - --- !query -SELECT c1, ( - WITH t3 AS (SELECT c1 + 1 AS c1, c2 + 1 AS c2 FROM t2), - t4 AS (SELECT * FROM t3 WHERE t1.c1 = c1) - SELECT SUM(c2) FROM t4 -) FROM t1 --- !query schema -struct --- !query output -0 NULL -1 7 - - --- !query -SELECT c1, ( - WITH t AS (SELECT * FROM t2) - SELECT SUM(c2) FROM (SELECT c1, c2 FROM t UNION SELECT c2, c1 FROM t) r(c1, c2) - WHERE c1 = t1.c1 -) FROM t1 --- !query schema -struct --- !query output -0 5 -1 NULL - - --- !query -WITH v AS (SELECT * FROM t2) -SELECT * FROM t1 WHERE c1 > ( - WITH t AS (SELECT * FROM t2) - SELECT COUNT(*) FROM v WHERE c1 = t1.c1 AND c1 > (SELECT SUM(c2) FROM t WHERE c1 = v.c1) -) --- !query schema -struct --- !query output -1 2 - - --- !query -WITH t AS (SELECT 1 AS a) -SELECT c1, (SELECT a FROM t WHERE a = c1) FROM t1 --- !query schema -struct --- !query output -0 NULL -1 1 - - --- !query -WITH -v1 AS (SELECT c1, c2, rand(0) c3 FROM t1), -v2 AS (SELECT c1, c2, rand(0) c4 FROM v1 WHERE c3 IN (SELECT c3 FROM v1)) -SELECT c1, ( - WITH v3 AS (SELECT c1, c2, rand(0) c5 FROM t2) - SELECT COUNT(*) FROM ( - SELECT * FROM v2 WHERE c1 > 0 - UNION SELECT * FROM v2 WHERE c2 > 0 - UNION SELECT * FROM v3 WHERE c2 > 0 - ) WHERE c1 = v1.c1 -) FROM v1 --- !query schema -struct --- !query output -0 3 -1 1 - - --- !query -SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b --- !query schema -struct<> --- !query output -java.lang.IllegalStateException -more than one row returned by a subquery used as an expression: -Subquery subquery#1, [id=#2] -+- AdaptiveSparkPlan isFinalPlan=true - +- == Final Plan == - VeloxColumnarToRowExec - +- ColumnarUnion - :- ProjectExecTransformer [1 AS a#3] - : +- InputIteratorTransformer[fake_column#4] - : +- InputAdapter - : +- RowToVeloxColumnar - : +- Scan OneRowRelation[fake_column#4] - +- ProjectExecTransformer [2 AS a#5] - +- InputIteratorTransformer[fake_column#6] - +- InputAdapter - +- RowToVeloxColumnar - +- Scan OneRowRelation[fake_column#6] - +- == Initial Plan == - Union - :- Project [1 AS a#3] - : +- Scan OneRowRelation[] - +- Project [2 AS a#5] - +- Scan OneRowRelation[] - - --- !query -CREATE OR REPLACE TEMP VIEW t1(c1, c2) AS (VALUES (0, 1), (1, 2)) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW t2(c1, c2) AS (VALUES (0, 2), (0, 3)) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW students(id, name, major, year) AS (VALUES - (0, 'A', 'CS', 2022), - (1, 'B', 'CS', 2022), - (2, 'C', 'Math', 2022)) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW exams(sid, course, curriculum, grade, date) AS (VALUES - (0, 'C1', 'CS', 4, 2020), - (0, 'C2', 'CS', 3, 2021), - (1, 'C1', 'CS', 2, 2020), - (1, 'C2', 'CS', 1, 2021)) --- !query schema -struct<> --- !query output - - - --- !query -SELECT students.name, exams.course -FROM students, exams -WHERE students.id = exams.sid - AND (students.major = 'CS' OR students.major = 'Games Eng') - AND exams.grade >= ( - SELECT avg(exams.grade) + 1 - FROM exams - WHERE students.id = exams.sid - OR (exams.curriculum = students.major AND students.year > exams.date)) --- !query schema -struct --- !query output -A C1 - - --- !query -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 > t2.c1) FROM t1 --- !query schema -struct --- !query output -2 -NULL - - --- !query -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 >= t2.c1 AND t1.c2 < t2.c2) FROM t1 --- !query schema -struct --- !query output -2 -3 - - --- !query -SELECT (SELECT count(*) FROM t2 WHERE t1.c1 > t2.c1) FROM t1 --- !query schema -struct --- !query output -0 -2 - - --- !query -SELECT c, ( - SELECT count(*) - FROM (VALUES ('ab'), ('abc'), ('bc')) t2(c) - WHERE t1.c = substring(t2.c, 1, 1) -) FROM (VALUES ('a'), ('b')) t1(c) --- !query schema -struct --- !query output -a 2 -b 1 - - --- !query -SELECT c, ( - SELECT count(*) - FROM (VALUES (0, 6), (1, 5), (2, 4), (3, 3)) t1(a, b) - WHERE a + b = c -) FROM (VALUES (6)) t2(c) --- !query schema -struct --- !query output -6 4 - - --- !query -SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1 --- !query schema -struct --- !query output -0 1 false -1 2 false - - --- !query -select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2 --- !query schema -struct --- !query output -false -false - - --- !query -CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7) --- !query schema -struct<> --- !query output - - - --- !query -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2a = t0a) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 7 - - --- !query -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a > t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b <= t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 5 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM - (SELECT t1c as t1a, t1a as t1b, t0a as t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t0a as t2b, t2c as t1a, t0b as t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 32 -2 NULL - - --- !query -SELECT t0a, (SELECT count(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 2 -2 0 - - --- !query -SELECT t0a, (SELECT sum(d) FROM - (SELECT t1a - t0a as d - FROM t1 - UNION ALL - SELECT t2a - t0a as d - FROM t2) -) -FROM t0 --- !query schema -struct --- !query output -1 1 -2 -2 - - --- !query -SELECT t0a, (SELECT sum(d) FROM - (SELECT sum(t0a) as d - FROM t1 - UNION ALL - SELECT sum(t2a) + t0a as d - FROM t2) -) -FROM t0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE", - "sqlState" : "0A000", - "messageParameters" : { - "sqlExprs" : "\"sum(t0a) AS d\"" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 36, - "stopIndex" : 67, - "fragment" : "SELECT sum(t0a) as d\n FROM t1" - } ] -} - - --- !query -select * -from -( - select t1.id c1, ( - select sum(c) - from ( - select t2.id * t2.id c - from range (1, 2) t2 where t1.id = t2.id - group by t2.id - ) - ) c2 - from range (1, 3) t1 -) t -where t.c2 is not null --- !query schema -struct --- !query output -1 1 diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala index 5067de74ea7e..345971e9ffc0 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala @@ -134,6 +134,7 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { "subquery/negative-cases/invalid-correlation.sql", "subquery/negative-cases/subq-input-typecheck.sql", "subquery/scalar-subquery/scalar-subquery-predicate.sql", + "subquery/scalar-subquery/scalar-subquery-select.sql", "subquery/subquery-in-from.sql", "postgreSQL/aggregates_part1.sql", "postgreSQL/aggregates_part2.sql", @@ -241,9 +242,6 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { "group-by.sql", "udf/udf-group-by.sql", // Overwrite some results of regr_intercept, regr_r2, corr. - "linear-regression.sql", - // Exception string doesn't match for - // SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b - "subquery/scalar-subquery/scalar-subquery-select.sql" + "linear-regression.sql" ) } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 3a993189d6ae..505417aebb50 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -80,8 +80,6 @@ class VeloxTestSettings extends BackendTestSettings { // NEW SUITE: disable as it expects exception which doesn't happen when offloaded to gluten .exclude( "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates") - // gluten throws different exception - .excludeByPrefix("SCALAR_SUBQUERY_TOO_MANY_ROWS:") // Doesn't support unhex with failOnError=true. .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex") enableSuite[GlutenQueryParsingErrorsSuite] diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala index d9b661c8a4a7..8896541c29d2 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala @@ -24,15 +24,4 @@ class GlutenQueryExecutionErrorsSuite override protected def getResourceParquetFilePath(name: String): String = { getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + name } - - testGluten( - "SCALAR_SUBQUERY_TOO_MANY_ROWS: " + - "More than one row returned by a subquery used as an expression") { - val exception = intercept[IllegalStateException] { - sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect() - } - assert( - exception.getMessage.contains("more than one row returned by a subquery" + - " used as an expression")) - } } diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql deleted file mode 100644 index 741292d2c0fa..000000000000 --- a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ /dev/null @@ -1,257 +0,0 @@ --- A test suite for scalar subquery in SELECT clause - -create temporary view t1 as select * from values - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), - ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), - ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), - ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), - ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), - ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), - ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') - as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i); - -create temporary view t2 as select * from values - ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), - ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), - ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), - ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), - ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) - as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i); - -create temporary view t3 as select * from values - ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), - ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), - ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), - ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), - ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), - ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') - as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i); - --- Group 1: scalar subquery in SELECT clause --- no correlation --- TC 01.01 --- more than one scalar subquery -SELECT (SELECT min(t3d) FROM t3) min_t3d, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c'; - --- TC 01.02 --- scalar subquery in an IN subquery -SELECT t1a, count(*) -FROM t1 -WHERE t1c IN (SELECT (SELECT min(t3c) FROM t3) - FROM t2 - GROUP BY t2g - HAVING count(*) > 1) -GROUP BY t1a; - --- TC 01.03 --- under a set op -SELECT (SELECT min(t3d) FROM t3) min_t3d, - null -FROM t1 -WHERE t1a = 'val1c' -UNION -SELECT null, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c'; - --- TC 01.04 -SELECT (SELECT min(t3c) FROM t3) min_t3d -FROM t1 -WHERE t1a = 'val1a' -INTERSECT -SELECT (SELECT min(t2c) FROM t2) min_t2d -FROM t1 -WHERE t1a = 'val1d'; - --- TC 01.05 -SELECT q1.t1a, q2.t2a, q1.min_t3d, q2.avg_t3d -FROM (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d - FROM t1 - WHERE t1a IN ('val1e', 'val1c')) q1 - FULL OUTER JOIN - (SELECT t2a, (SELECT avg(t3d) FROM t3) avg_t3d - FROM t2 - WHERE t2a IN ('val1c', 'val2a')) q2 -ON q1.t1a = q2.t2a -AND q1.min_t3d < q2.avg_t3d; - --- Group 2: scalar subquery in SELECT clause --- with correlation --- TC 02.01 -SELECT (SELECT min(t3d) FROM t3 WHERE t3.t3a = t1.t1a) min_t3d, - (SELECT max(t2h) FROM t2 WHERE t2.t2a = t1.t1a) max_t2h -FROM t1 -WHERE t1a = 'val1b'; - --- TC 02.02 -SELECT (SELECT min(t3d) FROM t3 WHERE t3a = t1a) min_t3d -FROM t1 -WHERE t1a = 'val1b' -MINUS -SELECT (SELECT min(t3d) FROM t3) abs_min_t3d -FROM t1 -WHERE t1a = 'val1b'; - --- TC 02.03 -SELECT t1a, t1b -FROM t1 -WHERE NOT EXISTS (SELECT (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) dummy - FROM t3 - WHERE t3b < (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) - AND t3a = t1a); - --- SPARK-34876: Non-nullable aggregates should not return NULL in a correlated subquery -SELECT t1a, - (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2, - (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2, - (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) approx_count_distinct_t2, - (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2, - (SELECT sort_array(collect_set(t2d)) FROM t2 WHERE t2a = t1a) collect_set_t2, - (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) collect_set_t2 -FROM t1; - --- SPARK-36028: Allow Project to host outer references in scalar subqueries -SELECT t1c, (SELECT t1c) FROM t1; -SELECT t1c, (SELECT t1c WHERE t1c = 8) FROM t1; -SELECT t1c, t1d, (SELECT c + d FROM (SELECT t1c AS c, t1d AS d)) FROM t1; -SELECT t1c, (SELECT SUM(c) FROM (SELECT t1c AS c)) FROM t1; -SELECT t1a, (SELECT SUM(t2b) FROM t2 JOIN (SELECT t1a AS a) ON t2a = a) FROM t1; - --- CTE in correlated scalar subqueries -CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t1(c1, c2); -CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 2), (0, 3) t2(c1, c2); - --- Single row subquery -SELECT c1, (WITH t AS (SELECT 1 AS a) SELECT a + c1 FROM t) FROM t1; --- Correlation in CTE. -SELECT c1, (WITH t AS (SELECT * FROM t2 WHERE c1 = t1.c1) SELECT SUM(c2) FROM t) FROM t1; --- Multiple CTE definitions. -SELECT c1, ( - WITH t3 AS (SELECT c1 + 1 AS c1, c2 + 1 AS c2 FROM t2), - t4 AS (SELECT * FROM t3 WHERE t1.c1 = c1) - SELECT SUM(c2) FROM t4 -) FROM t1; --- Multiple CTE references. -SELECT c1, ( - WITH t AS (SELECT * FROM t2) - SELECT SUM(c2) FROM (SELECT c1, c2 FROM t UNION SELECT c2, c1 FROM t) r(c1, c2) - WHERE c1 = t1.c1 -) FROM t1; --- Reference CTE in both the main query and the subquery. -WITH v AS (SELECT * FROM t2) -SELECT * FROM t1 WHERE c1 > ( - WITH t AS (SELECT * FROM t2) - SELECT COUNT(*) FROM v WHERE c1 = t1.c1 AND c1 > (SELECT SUM(c2) FROM t WHERE c1 = v.c1) -); --- Single row subquery that references CTE in the main query. -WITH t AS (SELECT 1 AS a) -SELECT c1, (SELECT a FROM t WHERE a = c1) FROM t1; --- Multiple CTE references with non-deterministic CTEs. -WITH -v1 AS (SELECT c1, c2, rand(0) c3 FROM t1), -v2 AS (SELECT c1, c2, rand(0) c4 FROM v1 WHERE c3 IN (SELECT c3 FROM v1)) -SELECT c1, ( - WITH v3 AS (SELECT c1, c2, rand(0) c5 FROM t2) - SELECT COUNT(*) FROM ( - SELECT * FROM v2 WHERE c1 > 0 - UNION SELECT * FROM v2 WHERE c2 > 0 - UNION SELECT * FROM v3 WHERE c2 > 0 - ) WHERE c1 = v1.c1 -) FROM v1; - --- Multi-value subquery error -SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b; - --- SPARK-36114: Support correlated non-equality predicates -CREATE OR REPLACE TEMP VIEW t1(c1, c2) AS (VALUES (0, 1), (1, 2)); -CREATE OR REPLACE TEMP VIEW t2(c1, c2) AS (VALUES (0, 2), (0, 3)); - --- Neumann example Q2 -CREATE OR REPLACE TEMP VIEW students(id, name, major, year) AS (VALUES - (0, 'A', 'CS', 2022), - (1, 'B', 'CS', 2022), - (2, 'C', 'Math', 2022)); -CREATE OR REPLACE TEMP VIEW exams(sid, course, curriculum, grade, date) AS (VALUES - (0, 'C1', 'CS', 4, 2020), - (0, 'C2', 'CS', 3, 2021), - (1, 'C1', 'CS', 2, 2020), - (1, 'C2', 'CS', 1, 2021)); - -SELECT students.name, exams.course -FROM students, exams -WHERE students.id = exams.sid - AND (students.major = 'CS' OR students.major = 'Games Eng') - AND exams.grade >= ( - SELECT avg(exams.grade) + 1 - FROM exams - WHERE students.id = exams.sid - OR (exams.curriculum = students.major AND students.year > exams.date)); - --- Correlated non-equality predicates -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 > t2.c1) FROM t1; -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 >= t2.c1 AND t1.c2 < t2.c2) FROM t1; - --- Correlated non-equality predicates with the COUNT bug. -SELECT (SELECT count(*) FROM t2 WHERE t1.c1 > t2.c1) FROM t1; - --- Correlated equality predicates that are not supported after SPARK-35080 -SELECT c, ( - SELECT count(*) - FROM (VALUES ('ab'), ('abc'), ('bc')) t2(c) - WHERE t1.c = substring(t2.c, 1, 1) -) FROM (VALUES ('a'), ('b')) t1(c); - -SELECT c, ( - SELECT count(*) - FROM (VALUES (0, 6), (1, 5), (2, 4), (3, 3)) t1(a, b) - WHERE a + b = c -) FROM (VALUES (6)) t2(c); - --- SPARK-43156: scalar subquery with Literal result like `COUNT(1) is null` -SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1; - -select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2; - --- SPARK-43596: handle IsNull when rewriting the domain join -set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false; -WITH T AS (SELECT 1 AS a) -SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y') -FROM (SELECT null as col) as foo; -set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true; - --- SPARK-43760: the result of the subquery can be NULL. -select * from ( - select t1.id c1, ( - select t2.id c from range (1, 2) t2 - where t1.id = t2.id ) c2 - from range (1, 3) t1 ) t -where t.c2 is not null; diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out deleted file mode 100644 index 5c6f141d8505..000000000000 --- a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ /dev/null @@ -1,614 +0,0 @@ --- Automatically generated by GlutenSQLQueryTestSuite --- Number of queries: 52 - - --- !query -create temporary view t1 as select * from values - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'), - ('val1b', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1a', 16S, 12, 21L, float(15.0), 20D, 20E2, timestamp '2014-06-04 01:02:00.001', date '2014-06-04'), - ('val1a', 16S, 12, 10L, float(15.0), 20D, 20E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 8S, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:02:00.001', date '2014-05-05'), - ('val1d', null, 16, 22L, float(17.0), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', null), - ('val1d', null, 16, 19L, float(17.0), 25D, 26E2, timestamp '2014-07-04 01:02:00.001', null), - ('val1e', 10S, null, 25L, float(17.0), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-09-04 01:02:00.001', date '2014-09-04'), - ('val1d', 10S, null, 12L, float(17.0), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:02:00.001', date '2014-04-04'), - ('val1e', 10S, null, 19L, float(17.0), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04') - as t1(t1a, t1b, t1c, t1d, t1e, t1f, t1g, t1h, t1i) --- !query schema -struct<> --- !query output - - - --- !query -create temporary view t2 as select * from values - ('val2a', 6S, 12, 14L, float(15), 20D, 20E2, timestamp '2014-04-04 01:01:00.000', date '2014-04-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 119L, float(17), 25D, 26E2, timestamp '2015-05-04 01:01:00.000', date '2015-05-04'), - ('val1c', 12S, 16, 219L, float(17), 25D, 26E2, timestamp '2016-05-04 01:01:00.000', date '2016-05-04'), - ('val1b', null, 16, 319L, float(17), 25D, 26E2, timestamp '2017-05-04 01:01:00.000', null), - ('val2e', 8S, null, 419L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1f', 19S, null, 519L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-06-04 01:01:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:01:00.000', date '2014-07-04'), - ('val1c', 12S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-08-04 01:01:00.000', date '2014-08-05'), - ('val1e', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:01:00.000', date '2014-09-04'), - ('val1f', 19S, null, 19L, float(17), 25D, 26E2, timestamp '2014-10-04 01:01:00.000', date '2014-10-04'), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:01:00.000', null) - as t2(t2a, t2b, t2c, t2d, t2e, t2f, t2g, t2h, t2i) --- !query schema -struct<> --- !query output - - - --- !query -create temporary view t3 as select * from values - ('val3a', 6S, 12, 110L, float(15), 20D, 20E2, timestamp '2014-04-04 01:02:00.000', date '2014-04-04'), - ('val3a', 6S, 12, 10L, float(15), 20D, 20E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 219L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 10S, 12, 19L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val1b', 8S, 16, 319L, float(17), 25D, 26E2, timestamp '2014-06-04 01:02:00.000', date '2014-06-04'), - ('val1b', 8S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-07-04 01:02:00.000', date '2014-07-04'), - ('val3c', 17S, 16, 519L, float(17), 25D, 26E2, timestamp '2014-08-04 01:02:00.000', date '2014-08-04'), - ('val3c', 17S, 16, 19L, float(17), 25D, 26E2, timestamp '2014-09-04 01:02:00.000', date '2014-09-05'), - ('val1b', null, 16, 419L, float(17), 25D, 26E2, timestamp '2014-10-04 01:02:00.000', null), - ('val1b', null, 16, 19L, float(17), 25D, 26E2, timestamp '2014-11-04 01:02:00.000', null), - ('val3b', 8S, null, 719L, float(17), 25D, 26E2, timestamp '2014-05-04 01:02:00.000', date '2014-05-04'), - ('val3b', 8S, null, 19L, float(17), 25D, 26E2, timestamp '2015-05-04 01:02:00.000', date '2015-05-04') - as t3(t3a, t3b, t3c, t3d, t3e, t3f, t3g, t3h, t3i) --- !query schema -struct<> --- !query output - - - --- !query -SELECT (SELECT min(t3d) FROM t3) min_t3d, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c' --- !query schema -struct --- !query output -10 2017-05-04 01:01:00 - - --- !query -SELECT t1a, count(*) -FROM t1 -WHERE t1c IN (SELECT (SELECT min(t3c) FROM t3) - FROM t2 - GROUP BY t2g - HAVING count(*) > 1) -GROUP BY t1a --- !query schema -struct --- !query output -val1a 2 - - --- !query -SELECT (SELECT min(t3d) FROM t3) min_t3d, - null -FROM t1 -WHERE t1a = 'val1c' -UNION -SELECT null, - (SELECT max(t2h) FROM t2) max_t2h -FROM t1 -WHERE t1a = 'val1c' --- !query schema -struct --- !query output -10 NULL -NULL 2017-05-04 01:01:00 - - --- !query -SELECT (SELECT min(t3c) FROM t3) min_t3d -FROM t1 -WHERE t1a = 'val1a' -INTERSECT -SELECT (SELECT min(t2c) FROM t2) min_t2d -FROM t1 -WHERE t1a = 'val1d' --- !query schema -struct --- !query output -12 - - --- !query -SELECT q1.t1a, q2.t2a, q1.min_t3d, q2.avg_t3d -FROM (SELECT t1a, (SELECT min(t3d) FROM t3) min_t3d - FROM t1 - WHERE t1a IN ('val1e', 'val1c')) q1 - FULL OUTER JOIN - (SELECT t2a, (SELECT avg(t3d) FROM t3) avg_t3d - FROM t2 - WHERE t2a IN ('val1c', 'val2a')) q2 -ON q1.t1a = q2.t2a -AND q1.min_t3d < q2.avg_t3d --- !query schema -struct --- !query output -NULL val2a NULL 200.83333333333334 -val1c val1c 10 200.83333333333334 -val1c val1c 10 200.83333333333334 -val1e NULL 10 NULL -val1e NULL 10 NULL -val1e NULL 10 NULL - - --- !query -SELECT (SELECT min(t3d) FROM t3 WHERE t3.t3a = t1.t1a) min_t3d, - (SELECT max(t2h) FROM t2 WHERE t2.t2a = t1.t1a) max_t2h -FROM t1 -WHERE t1a = 'val1b' --- !query schema -struct --- !query output -19 2017-05-04 01:01:00 - - --- !query -SELECT (SELECT min(t3d) FROM t3 WHERE t3a = t1a) min_t3d -FROM t1 -WHERE t1a = 'val1b' -MINUS -SELECT (SELECT min(t3d) FROM t3) abs_min_t3d -FROM t1 -WHERE t1a = 'val1b' --- !query schema -struct --- !query output -19 - - --- !query -SELECT t1a, t1b -FROM t1 -WHERE NOT EXISTS (SELECT (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) dummy - FROM t3 - WHERE t3b < (SELECT max(t2b) - FROM t2 LEFT JOIN t1 - ON t2a = t1a - WHERE t2c = t3c) - AND t3a = t1a) --- !query schema -struct --- !query output -val1a 16 -val1a 16 -val1a 6 -val1a 6 -val1c 8 -val1d 10 -val1d NULL -val1d NULL -val1e 10 -val1e 10 -val1e 10 - - --- !query -SELECT t1a, - (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2, - (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2, - (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) approx_count_distinct_t2, - (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2, - (SELECT sort_array(collect_set(t2d)) FROM t2 WHERE t2a = t1a) collect_set_t2, - (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) collect_set_t2 -FROM t1 --- !query schema -struct,collect_set_t2:array,collect_set_t2:string> --- !query output -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1a 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1b 6 6 3 [19,119,319,19,19,19] [19,119,319] 0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001 -val1c 2 2 2 [219,19] [19,219] 0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001 -val1d 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1d 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1d 0 0 0 [] [] 0000000100000000000000000000000100000004000000005D8D6AB90000000000000000000000000000000000000000000000000000000000000000 -val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 -val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 -val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 - - --- !query -SELECT t1c, (SELECT t1c) FROM t1 --- !query schema -struct --- !query output -12 12 -12 12 -16 16 -16 16 -16 16 -16 16 -8 8 -8 8 -NULL NULL -NULL NULL -NULL NULL -NULL NULL - - --- !query -SELECT t1c, (SELECT t1c WHERE t1c = 8) FROM t1 --- !query schema -struct --- !query output -12 NULL -12 NULL -16 NULL -16 NULL -16 NULL -16 NULL -8 8 -8 8 -NULL NULL -NULL NULL -NULL NULL -NULL NULL - - --- !query -SELECT t1c, t1d, (SELECT c + d FROM (SELECT t1c AS c, t1d AS d)) FROM t1 --- !query schema -struct --- !query output -12 10 22 -12 21 33 -16 19 35 -16 19 35 -16 19 35 -16 22 38 -8 10 18 -8 10 18 -NULL 12 NULL -NULL 19 NULL -NULL 19 NULL -NULL 25 NULL - - --- !query -SELECT t1c, (SELECT SUM(c) FROM (SELECT t1c AS c)) FROM t1 --- !query schema -struct --- !query output -12 12 -12 12 -16 16 -16 16 -16 16 -16 16 -8 8 -8 8 -NULL NULL -NULL NULL -NULL NULL -NULL NULL - - --- !query -SELECT t1a, (SELECT SUM(t2b) FROM t2 JOIN (SELECT t1a AS a) ON t2a = a) FROM t1 --- !query schema -struct --- !query output -val1a NULL -val1a NULL -val1a NULL -val1a NULL -val1b 36 -val1c 24 -val1d NULL -val1d NULL -val1d NULL -val1e 8 -val1e 8 -val1e 8 - - --- !query -CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (0, 1), (1, 2) t1(c1, c2) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMPORARY VIEW t2 AS VALUES (0, 2), (0, 3) t2(c1, c2) --- !query schema -struct<> --- !query output - - - --- !query -SELECT c1, (WITH t AS (SELECT 1 AS a) SELECT a + c1 FROM t) FROM t1 --- !query schema -struct --- !query output -0 1 -1 2 - - --- !query -SELECT c1, (WITH t AS (SELECT * FROM t2 WHERE c1 = t1.c1) SELECT SUM(c2) FROM t) FROM t1 --- !query schema -struct --- !query output -0 5 -1 NULL - - --- !query -SELECT c1, ( - WITH t3 AS (SELECT c1 + 1 AS c1, c2 + 1 AS c2 FROM t2), - t4 AS (SELECT * FROM t3 WHERE t1.c1 = c1) - SELECT SUM(c2) FROM t4 -) FROM t1 --- !query schema -struct --- !query output -0 NULL -1 7 - - --- !query -SELECT c1, ( - WITH t AS (SELECT * FROM t2) - SELECT SUM(c2) FROM (SELECT c1, c2 FROM t UNION SELECT c2, c1 FROM t) r(c1, c2) - WHERE c1 = t1.c1 -) FROM t1 --- !query schema -struct --- !query output -0 5 -1 NULL - - --- !query -WITH v AS (SELECT * FROM t2) -SELECT * FROM t1 WHERE c1 > ( - WITH t AS (SELECT * FROM t2) - SELECT COUNT(*) FROM v WHERE c1 = t1.c1 AND c1 > (SELECT SUM(c2) FROM t WHERE c1 = v.c1) -) --- !query schema -struct --- !query output -1 2 - - --- !query -WITH t AS (SELECT 1 AS a) -SELECT c1, (SELECT a FROM t WHERE a = c1) FROM t1 --- !query schema -struct --- !query output -0 NULL -1 1 - - --- !query -WITH -v1 AS (SELECT c1, c2, rand(0) c3 FROM t1), -v2 AS (SELECT c1, c2, rand(0) c4 FROM v1 WHERE c3 IN (SELECT c3 FROM v1)) -SELECT c1, ( - WITH v3 AS (SELECT c1, c2, rand(0) c5 FROM t2) - SELECT COUNT(*) FROM ( - SELECT * FROM v2 WHERE c1 > 0 - UNION SELECT * FROM v2 WHERE c2 > 0 - UNION SELECT * FROM v3 WHERE c2 > 0 - ) WHERE c1 = v1.c1 -) FROM v1 --- !query schema -struct --- !query output -0 3 -1 1 - - --- !query -SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b --- !query schema -struct<> --- !query output -java.lang.IllegalStateException -more than one row returned by a subquery used as an expression: -Subquery subquery#1, [id=#2] -+- AdaptiveSparkPlan isFinalPlan=true - +- == Final Plan == - VeloxColumnarToRowExec - +- ColumnarUnion - :- ProjectExecTransformer [1 AS a#3] - : +- InputIteratorTransformer[fake_column#4] - : +- InputAdapter - : +- RowToVeloxColumnar - : +- Scan OneRowRelation[fake_column#4] - +- ProjectExecTransformer [2 AS a#5] - +- InputIteratorTransformer[fake_column#6] - +- InputAdapter - +- RowToVeloxColumnar - +- Scan OneRowRelation[fake_column#6] - +- == Initial Plan == - Union - :- Project [1 AS a#3] - : +- Scan OneRowRelation[] - +- Project [2 AS a#5] - +- Scan OneRowRelation[] - - --- !query -CREATE OR REPLACE TEMP VIEW t1(c1, c2) AS (VALUES (0, 1), (1, 2)) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW t2(c1, c2) AS (VALUES (0, 2), (0, 3)) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW students(id, name, major, year) AS (VALUES - (0, 'A', 'CS', 2022), - (1, 'B', 'CS', 2022), - (2, 'C', 'Math', 2022)) --- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW exams(sid, course, curriculum, grade, date) AS (VALUES - (0, 'C1', 'CS', 4, 2020), - (0, 'C2', 'CS', 3, 2021), - (1, 'C1', 'CS', 2, 2020), - (1, 'C2', 'CS', 1, 2021)) --- !query schema -struct<> --- !query output - - - --- !query -SELECT students.name, exams.course -FROM students, exams -WHERE students.id = exams.sid - AND (students.major = 'CS' OR students.major = 'Games Eng') - AND exams.grade >= ( - SELECT avg(exams.grade) + 1 - FROM exams - WHERE students.id = exams.sid - OR (exams.curriculum = students.major AND students.year > exams.date)) --- !query schema -struct --- !query output -A C1 - - --- !query -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 > t2.c1) FROM t1 --- !query schema -struct --- !query output -2 -NULL - - --- !query -SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 >= t2.c1 AND t1.c2 < t2.c2) FROM t1 --- !query schema -struct --- !query output -2 -3 - - --- !query -SELECT (SELECT count(*) FROM t2 WHERE t1.c1 > t2.c1) FROM t1 --- !query schema -struct --- !query output -0 -2 - - --- !query -SELECT c, ( - SELECT count(*) - FROM (VALUES ('ab'), ('abc'), ('bc')) t2(c) - WHERE t1.c = substring(t2.c, 1, 1) -) FROM (VALUES ('a'), ('b')) t1(c) --- !query schema -struct --- !query output -a 2 -b 1 - - --- !query -SELECT c, ( - SELECT count(*) - FROM (VALUES (0, 6), (1, 5), (2, 4), (3, 3)) t1(a, b) - WHERE a + b = c -) FROM (VALUES (6)) t2(c) --- !query schema -struct --- !query output -6 4 - - --- !query -SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1 --- !query schema -struct --- !query output -0 1 false -1 2 false - - --- !query -select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2 --- !query schema -struct --- !query output -false -false - - --- !query -set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false --- !query schema -struct --- !query output -spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline false - - --- !query -WITH T AS (SELECT 1 AS a) -SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y') -FROM (SELECT null as col) as foo --- !query schema -struct --- !query output -NULL - - --- !query -set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true --- !query schema -struct --- !query output -spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline true - - --- !query -select * from ( - select t1.id c1, ( - select t2.id c from range (1, 2) t2 - where t1.id = t2.id ) c2 - from range (1, 3) t1 ) t -where t.c2 is not null --- !query schema -struct --- !query output -1 1 diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala index 5067de74ea7e..345971e9ffc0 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxSQLQueryTestSettings.scala @@ -134,6 +134,7 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { "subquery/negative-cases/invalid-correlation.sql", "subquery/negative-cases/subq-input-typecheck.sql", "subquery/scalar-subquery/scalar-subquery-predicate.sql", + "subquery/scalar-subquery/scalar-subquery-select.sql", "subquery/subquery-in-from.sql", "postgreSQL/aggregates_part1.sql", "postgreSQL/aggregates_part2.sql", @@ -241,9 +242,6 @@ object VeloxSQLQueryTestSettings extends SQLQueryTestSettings { "group-by.sql", "udf/udf-group-by.sql", // Overwrite some results of regr_intercept, regr_r2, corr. - "linear-regression.sql", - // Exception string doesn't match for - // SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b - "subquery/scalar-subquery/scalar-subquery-select.sql" + "linear-regression.sql" ) } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 98942462aec8..27557f92046d 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -81,8 +81,6 @@ class VeloxTestSettings extends BackendTestSettings { // NEW SUITE: disable as it expects exception which doesn't happen when offloaded to gluten .exclude( "INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates") - // gluten throws different exception - .excludeByPrefix("SCALAR_SUBQUERY_TOO_MANY_ROWS:") // Doesn't support unhex with failOnError=true. .exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex") enableSuite[GlutenQueryParsingErrorsSuite] diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala index d9b661c8a4a7..8896541c29d2 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/errors/GlutenQueryExecutionErrorsSuite.scala @@ -24,15 +24,4 @@ class GlutenQueryExecutionErrorsSuite override protected def getResourceParquetFilePath(name: String): String = { getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + "/" + name } - - testGluten( - "SCALAR_SUBQUERY_TOO_MANY_ROWS: " + - "More than one row returned by a subquery used as an expression") { - val exception = intercept[IllegalStateException] { - sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect() - } - assert( - exception.getMessage.contains("more than one row returned by a subquery" + - " used as an expression")) - } }