From 9b3f59a1caeb591778178ba5093153157278ecec Mon Sep 17 00:00:00 2001 From: Joey Date: Wed, 17 Apr 2024 21:15:57 +0800 Subject: [PATCH] [VL][UT] Fix scalar-subquery-select.sql in spark35(#5425) --- .../scalar-subquery-select.sql | 126 +---------- .../scalar-subquery-select.sql.out | 207 ++---------------- .../spark/sql/GlutenSQLQueryTestSuite.scala | 1 - 3 files changed, 25 insertions(+), 309 deletions(-) diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql index 48d1594fa51a..741292d2c0fa 100644 --- a/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql +++ b/gluten-ut/spark35/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql @@ -241,123 +241,17 @@ SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1; select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2; --- Set operations in correlation path - -CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0); -CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3); -CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7); - -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2a = t0a) -) -FROM t0; - -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a > t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b <= t0b) -) -FROM t0; - -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - --- Tests for column aliasing -SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM - (SELECT t1c as t1a, t1a as t1b, t0a as t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t0a as t2b, t2c as t1a, t0b as t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - --- Test handling of COUNT bug -SELECT t0a, (SELECT count(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0; - --- Correlated references in project -SELECT t0a, (SELECT sum(d) FROM - (SELECT t1a - t0a as d - FROM t1 - UNION ALL - SELECT t2a - t0a as d - FROM t2) -) -FROM t0; - --- Correlated references in aggregate - unsupported -SELECT t0a, (SELECT sum(d) FROM - (SELECT sum(t0a) as d - FROM t1 - UNION ALL - SELECT sum(t2a) + t0a as d - FROM t2) -) -FROM t0; +-- SPARK-43596: handle IsNull when rewriting the domain join +set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false; +WITH T AS (SELECT 1 AS a) +SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y') +FROM (SELECT null as col) as foo; +set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true; -- SPARK-43760: the result of the subquery can be NULL. -select * -from -( +select * from ( select t1.id c1, ( - select sum(c) - from ( - select t2.id * t2.id c - from range (1, 2) t2 where t1.id = t2.id - group by t2.id - ) - ) c2 - from range (1, 3) t1 -) t + select t2.id c from range (1, 2) t2 + where t1.id = t2.id ) c2 + from range (1, 3) t1 ) t where t.c2 is not null; diff --git a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out index 088359d39b86..5c6f141d8505 100644 --- a/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out +++ b/gluten-ut/spark35/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out @@ -576,214 +576,37 @@ false -- !query -CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0) +set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false -- !query schema -struct<> --- !query output - - - --- !query -CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3) --- !query schema -struct<> +struct -- !query output - +spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline false -- !query -CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7) +WITH T AS (SELECT 1 AS a) +SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y') +FROM (SELECT null as col) as foo -- !query schema -struct<> +struct -- !query output - - - --- !query -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2a = t0a) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 7 - - --- !query -SELECT t0a, (SELECT sum(c) FROM - (SELECT t1c as c - FROM t1 - WHERE t1a > t0a - UNION ALL - SELECT t2c as c - FROM t2 - WHERE t2b <= t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 5 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 8 -2 NULL - - --- !query -SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM - (SELECT t1c as t1a, t1a as t1b, t0a as t1c - FROM t1 - WHERE t1a = t0a - UNION ALL - SELECT t0a as t2b, t2c as t1a, t0b as t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 32 -2 NULL - - --- !query -SELECT t0a, (SELECT count(t1c) FROM - (SELECT t1c - FROM t1 - WHERE t1a = t0a - UNION DISTINCT - SELECT t2c - FROM t2 - WHERE t2b = t0b) -) -FROM t0 --- !query schema -struct --- !query output -1 2 -2 0 +NULL -- !query -SELECT t0a, (SELECT sum(d) FROM - (SELECT t1a - t0a as d - FROM t1 - UNION ALL - SELECT t2a - t0a as d - FROM t2) -) -FROM t0 +set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true -- !query schema -struct +struct -- !query output -1 1 -2 -2 +spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline true -- !query -SELECT t0a, (SELECT sum(d) FROM - (SELECT sum(t0a) as d - FROM t1 - UNION ALL - SELECT sum(t2a) + t0a as d - FROM t2) -) -FROM t0 --- !query schema -struct<> --- !query output -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE", - "sqlState" : "0A000", - "messageParameters" : { - "sqlExprs" : "\"sum(t0a) AS d\"" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 36, - "stopIndex" : 67, - "fragment" : "SELECT sum(t0a) as d\n FROM t1" - } ] -} - - --- !query -select * -from -( +select * from ( select t1.id c1, ( - select sum(c) - from ( - select t2.id * t2.id c - from range (1, 2) t2 where t1.id = t2.id - group by t2.id - ) - ) c2 - from range (1, 3) t1 -) t + select t2.id c from range (1, 2) t2 + where t1.id = t2.id ) c2 + from range (1, 3) t1 ) t where t.c2 is not null -- !query schema struct diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala index 9f96fa0b0fbf..b1f3945bf192 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala @@ -227,7 +227,6 @@ class GlutenSQLQueryTestSuite "window.sql", // Local window fixes are not added. // Disable for Spark 3. "group-by.sql", - "subquery/scalar-subquery/scalar-subquery-select.sql", "udf/udf-group-by.sql - Scala UDF" ) ++ otherIgnoreList ++ udafIgnoreList