Skip to content

Commit

Permalink
[VL][UT] Fix scalar-subquery-select.sql in spark35(#5425)
Browse files Browse the repository at this point in the history
  • Loading branch information
liujiayi771 authored Apr 17, 2024
1 parent 5574938 commit 9b3f59a
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 309 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -241,123 +241,17 @@ SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1;

select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2;

-- Set operations in correlation path

CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3);
CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7);

SELECT t0a, (SELECT sum(c) FROM
(SELECT t1c as c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t2c as c
FROM t2
WHERE t2b = t0b)
)
FROM t0;

SELECT t0a, (SELECT sum(c) FROM
(SELECT t1c as c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t2c as c
FROM t2
WHERE t2a = t0a)
)
FROM t0;

SELECT t0a, (SELECT sum(c) FROM
(SELECT t1c as c
FROM t1
WHERE t1a > t0a
UNION ALL
SELECT t2c as c
FROM t2
WHERE t2b <= t0b)
)
FROM t0;

SELECT t0a, (SELECT sum(t1c) FROM
(SELECT t1c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0;

SELECT t0a, (SELECT sum(t1c) FROM
(SELECT t1c
FROM t1
WHERE t1a = t0a
UNION DISTINCT
SELECT t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0;

-- Tests for column aliasing
SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
(SELECT t1c as t1a, t1a as t1b, t0a as t1c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t0a as t2b, t2c as t1a, t0b as t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0;

-- Test handling of COUNT bug
SELECT t0a, (SELECT count(t1c) FROM
(SELECT t1c
FROM t1
WHERE t1a = t0a
UNION DISTINCT
SELECT t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0;

-- Correlated references in project
SELECT t0a, (SELECT sum(d) FROM
(SELECT t1a - t0a as d
FROM t1
UNION ALL
SELECT t2a - t0a as d
FROM t2)
)
FROM t0;

-- Correlated references in aggregate - unsupported
SELECT t0a, (SELECT sum(d) FROM
(SELECT sum(t0a) as d
FROM t1
UNION ALL
SELECT sum(t2a) + t0a as d
FROM t2)
)
FROM t0;
-- SPARK-43596: handle IsNull when rewriting the domain join
set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false;
WITH T AS (SELECT 1 AS a)
SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y')
FROM (SELECT null as col) as foo;
set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true;

-- SPARK-43760: the result of the subquery can be NULL.
select *
from
(
select * from (
select t1.id c1, (
select sum(c)
from (
select t2.id * t2.id c
from range (1, 2) t2 where t1.id = t2.id
group by t2.id
)
) c2
from range (1, 3) t1
) t
select t2.id c from range (1, 2) t2
where t1.id = t2.id ) c2
from range (1, 3) t1 ) t
where t.c2 is not null;
Original file line number Diff line number Diff line change
Expand Up @@ -576,214 +576,37 @@ false


-- !query
CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=false
-- !query schema
struct<>
-- !query output



-- !query
CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3)
-- !query schema
struct<>
struct<key:string,value:string>
-- !query output

spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline false


-- !query
CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7)
WITH T AS (SELECT 1 AS a)
SELECT (SELECT sum(1) FROM T WHERE a = col OR upper(col)= 'Y')
FROM (SELECT null as col) as foo
-- !query schema
struct<>
struct<scalarsubquery(col, col):bigint>
-- !query output



-- !query
SELECT t0a, (SELECT sum(c) FROM
(SELECT t1c as c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t2c as c
FROM t2
WHERE t2b = t0b)
)
FROM t0
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
-- !query output
1 8
2 NULL


-- !query
SELECT t0a, (SELECT sum(c) FROM
(SELECT t1c as c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t2c as c
FROM t2
WHERE t2a = t0a)
)
FROM t0
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
-- !query output
1 8
2 7


-- !query
SELECT t0a, (SELECT sum(c) FROM
(SELECT t1c as c
FROM t1
WHERE t1a > t0a
UNION ALL
SELECT t2c as c
FROM t2
WHERE t2b <= t0b)
)
FROM t0
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
-- !query output
1 5
2 NULL


-- !query
SELECT t0a, (SELECT sum(t1c) FROM
(SELECT t1c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
-- !query output
1 8
2 NULL


-- !query
SELECT t0a, (SELECT sum(t1c) FROM
(SELECT t1c
FROM t1
WHERE t1a = t0a
UNION DISTINCT
SELECT t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
-- !query output
1 8
2 NULL


-- !query
SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
(SELECT t1c as t1a, t1a as t1b, t0a as t1c
FROM t1
WHERE t1a = t0a
UNION ALL
SELECT t0a as t2b, t2c as t1a, t0b as t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0a, t0a, t0b, t0b):bigint>
-- !query output
1 32
2 NULL


-- !query
SELECT t0a, (SELECT count(t1c) FROM
(SELECT t1c
FROM t1
WHERE t1a = t0a
UNION DISTINCT
SELECT t2c
FROM t2
WHERE t2b = t0b)
)
FROM t0
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
-- !query output
1 2
2 0
NULL


-- !query
SELECT t0a, (SELECT sum(d) FROM
(SELECT t1a - t0a as d
FROM t1
UNION ALL
SELECT t2a - t0a as d
FROM t2)
)
FROM t0
set spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline=true
-- !query schema
struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
struct<key:string,value:string>
-- !query output
1 1
2 -2
spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline true


-- !query
SELECT t0a, (SELECT sum(d) FROM
(SELECT sum(t0a) as d
FROM t1
UNION ALL
SELECT sum(t2a) + t0a as d
FROM t2)
)
FROM t0
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.AnalysisException
{
"errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
"sqlState" : "0A000",
"messageParameters" : {
"sqlExprs" : "\"sum(t0a) AS d\""
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 36,
"stopIndex" : 67,
"fragment" : "SELECT sum(t0a) as d\n FROM t1"
} ]
}


-- !query
select *
from
(
select * from (
select t1.id c1, (
select sum(c)
from (
select t2.id * t2.id c
from range (1, 2) t2 where t1.id = t2.id
group by t2.id
)
) c2
from range (1, 3) t1
) t
select t2.id c from range (1, 2) t2
where t1.id = t2.id ) c2
from range (1, 3) t1 ) t
where t.c2 is not null
-- !query schema
struct<c1:bigint,c2:bigint>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,6 @@ class GlutenSQLQueryTestSuite
"window.sql", // Local window fixes are not added.
// Disable for Spark 3.
"group-by.sql",
"subquery/scalar-subquery/scalar-subquery-select.sql",
"udf/udf-group-by.sql - Scala UDF"
) ++ otherIgnoreList ++ udafIgnoreList

Expand Down

0 comments on commit 9b3f59a

Please sign in to comment.