Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-51050] [SQL] Add group by alias tests to the group-by.sql #49750

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,75 @@ Filter (k#x > 1)
+- LocalRelation [a#x, b#x]


-- !query
SELECT a AS k FROM testData GROUP BY 'k'
-- !query analysis
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "MISSING_AGGREGATION",
"sqlState" : "42803",
"messageParameters" : {
"expression" : "\"a\"",
"expressionAnyValue" : "\"any_value(a)\""
}
}


-- !query
SELECT 1 AS k FROM testData GROUP BY 'k'
-- !query analysis
Aggregate [k], [1 AS k#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT concat_ws(' ', a, b) FROM testData GROUP BY `concat_ws( , a, b)`
-- !query analysis
Aggregate [concat_ws( , cast(a#x as string), cast(b#x as string))], [concat_ws( , cast(a#x as string), cast(b#x as string)) AS concat_ws( , a, b)#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT 1 AS a FROM testData GROUP BY a
-- !query analysis
Aggregate [a#x], [1 AS a#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT 1 AS a FROM testData GROUP BY `a`
-- !query analysis
Aggregate [a#x], [1 AS a#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT 1 GROUP BY `1`
-- !query analysis
Aggregate [1], [1 AS 1#x]
+- OneRowRelation


-- !query
SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
-- !query analysis
Expand Down Expand Up @@ -320,6 +389,38 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}


-- !query
SELECT 1 GROUP BY `1`
-- !query analysis
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please address this.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idea was to add some tests that should fail (with set spark.sql.groupByAliases=false;). I can remove them if needed

"sqlState" : "42703",
"messageParameters" : {
"objectName" : "`1`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 19,
"stopIndex" : 21,
"fragment" : "`1`"
} ]
}


-- !query
SELECT 1 AS a FROM testData GROUP BY `a`
-- !query analysis
Aggregate [a#x], [1 AS a#x]
+- SubqueryAlias testdata
+- View (`testData`, [a#x, b#x])
+- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x]
+- Project [a#x, b#x]
+- SubqueryAlias testData
+- LocalRelation [a#x, b#x]


-- !query
SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
-- !query analysis
Expand Down
18 changes: 18 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/group-by.sql
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,20 @@ SELECT COUNT(DISTINCT b), COUNT(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS
SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
SELECT a AS k, COUNT(b) FROM testData GROUP BY k HAVING k > 1;

-- GROUP BY literal
SELECT a AS k FROM testData GROUP BY 'k';
SELECT 1 AS k FROM testData GROUP BY 'k';

-- GROUP BY alias with the function name
SELECT concat_ws(' ', a, b) FROM testData GROUP BY `concat_ws( , a, b)`;

-- GROUP BY column with name same as an alias used in the project list
SELECT 1 AS a FROM testData GROUP BY a;
SELECT 1 AS a FROM testData GROUP BY `a`;

-- GROUP BY implicit alias
SELECT 1 GROUP BY `1`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SELECT 1 GROUP BY `1`;
SELECT 1 GROUP BY `1`;
-- GROUP BY alias with the subquery name
SELECT (SELECT a FROM testData LIMIT 1) + (SELECT b FROM testData LIMIT 1) FROM VALUES (1, 2) GROUP BY `(SELECTaFROMtestDataLIMIT1)+(SELECTbFROMtestDataLIMIT1)`
-- GROUP BY with expression subqueries
SELECT a, count(*) FROM testData GROUP BY (SELECT b FROM testData)
SELECT a, count(*) FROM testData GROUP BY a, (SELECT b FROM testData)
SELECT a, count(*) FROM testData GROUP BY a, (SELECT b FROM testData LIMIT 1)
SELECT a, count(*) FROM testData GROUP BY a, b IN (SELECT a FROM testData)
SELECT a, count(*) FROM testData GROUP BY a, a IN (SELECT b FROM testData)
SELECT a, count(*) FROM testData GROUP BY a, EXISTS(SELECT b FROM testData)


-- GROUP BY alias with invalid col in SELECT list
SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k;

Expand All @@ -64,6 +78,10 @@ set spark.sql.groupByAliases=false;

-- Check analysis exceptions
SELECT a AS k, COUNT(b) FROM testData GROUP BY k;
SELECT 1 GROUP BY `1`;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a duplicate.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Idea was to add some tests that should fail (with set spark.sql.groupByAliases=false;). I can remove them if needed


-- GROUP BY column with name same as an alias used in the project list
SELECT 1 AS a FROM testData GROUP BY `a`;
mihailoale-db marked this conversation as resolved.
Show resolved Hide resolved

-- Aggregate with empty input and non-empty GroupBy expressions.
SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a;
Expand Down
103 changes: 103 additions & 0 deletions sql/core/src/test/resources/sql-tests/results/group-by.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,76 @@ struct<k:int,count(b):bigint>
3 2


-- !query
SELECT a AS k FROM testData GROUP BY 'k'
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "MISSING_AGGREGATION",
"sqlState" : "42803",
"messageParameters" : {
"expression" : "\"a\"",
"expressionAnyValue" : "\"any_value(a)\""
}
}


-- !query
SELECT 1 AS k FROM testData GROUP BY 'k'
-- !query schema
struct<k:int>
-- !query output
1


-- !query
SELECT concat_ws(' ', a, b) FROM testData GROUP BY `concat_ws( , a, b)`
-- !query schema
struct<concat_ws( , a, b):string>
-- !query output

1
1 1
1 2
2 1
2 2
3
3 1
3 2


-- !query
SELECT 1 AS a FROM testData GROUP BY a
-- !query schema
struct<a:int>
-- !query output
1
1
1
1


-- !query
SELECT 1 AS a FROM testData GROUP BY `a`
-- !query schema
struct<a:int>
-- !query output
1
1
1
1


-- !query
SELECT 1 GROUP BY `1`
-- !query schema
struct<1:int>
-- !query output
1


-- !query
SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
-- !query schema
Expand Down Expand Up @@ -294,6 +364,39 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}


-- !query
SELECT 1 GROUP BY `1`
-- !query schema
struct<>
-- !query output
org.apache.spark.sql.catalyst.ExtendedAnalysisException
{
"errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
"sqlState" : "42703",
"messageParameters" : {
"objectName" : "`1`"
},
"queryContext" : [ {
"objectType" : "",
"objectName" : "",
"startIndex" : 19,
"stopIndex" : 21,
"fragment" : "`1`"
} ]
}


-- !query
SELECT 1 AS a FROM testData GROUP BY `a`
-- !query schema
struct<a:int>
-- !query output
1
1
1
1


-- !query
SELECT a, COUNT(1) FROM testData WHERE false GROUP BY a
-- !query schema
Expand Down