-
Notifications
You must be signed in to change notification settings - Fork 445
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[GLUTEN-5341][VL] Enable linear-regression.sql in GlutenSQLQueryTestS…
…uite for Spark 3.5 (#5469) Enable linear-regression.sql in GlutenSQLQueryTestSuite for Spark 3.5
- Loading branch information
1 parent
b08258e
commit 4d585fa
Showing
3 changed files
with
330 additions
and
0 deletions.
There are no files selected for viewing
52 changes: 52 additions & 0 deletions
52
gluten-ut/spark35/src/test/resources/sql-tests/inputs/linear-regression.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
-- Test data. | ||
CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES | ||
(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35) | ||
AS testRegression(k, y, x); | ||
|
||
-- SPARK-37613: Support ANSI Aggregate Function: regr_count | ||
SELECT regr_count(y, x) FROM testRegression; | ||
SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL; | ||
SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k; | ||
|
||
-- SPARK-37613: Support ANSI Aggregate Function: regr_r2 | ||
SELECT regr_r2(y, x) FROM testRegression; | ||
SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL; | ||
SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k; | ||
|
||
-- SPARK-37614: Support ANSI Aggregate Function: regr_avgx & regr_avgy | ||
SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression; | ||
SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; | ||
SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k; | ||
|
||
-- SPARK-37672: Support ANSI Aggregate Function: regr_sxx | ||
SELECT regr_sxx(y, x) FROM testRegression; | ||
SELECT regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; | ||
SELECT k, regr_sxx(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k; | ||
|
||
-- SPARK-37681: Support ANSI Aggregate Function: regr_sxy | ||
SELECT regr_sxy(y, x) FROM testRegression; | ||
SELECT regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; | ||
SELECT k, regr_sxy(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k; | ||
|
||
-- SPARK-37702: Support ANSI Aggregate Function: regr_syy | ||
SELECT regr_syy(y, x) FROM testRegression; | ||
SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; | ||
SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k; | ||
|
||
-- SPARK-39230: Support ANSI Aggregate Function: regr_slope | ||
SELECT regr_slope(y, x) FROM testRegression; | ||
SELECT regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; | ||
SELECT k, regr_slope(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k; | ||
|
||
-- SPARK-37623: Support ANSI Aggregate Function: regr_intercept | ||
SELECT regr_intercept(y, x) FROM testRegression; | ||
SELECT regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL; | ||
SELECT k, regr_intercept(y, x) FROM testRegression GROUP BY k; | ||
SELECT k, regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k; |
276 changes: 276 additions & 0 deletions
276
gluten-ut/spark35/src/test/resources/sql-tests/results/linear-regression.sql.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,276 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- !query | ||
CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES | ||
(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35) | ||
AS testRegression(k, y, x) | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
SELECT regr_count(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_count(y, x):bigint> | ||
-- !query output | ||
3 | ||
|
||
|
||
-- !query | ||
SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL | ||
-- !query schema | ||
struct<regr_count(y, x):bigint> | ||
-- !query output | ||
3 | ||
|
||
|
||
-- !query | ||
SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,count(1):bigint,regr_count(y, x):bigint> | ||
-- !query output | ||
1 1 0 | ||
2 4 3 | ||
|
||
|
||
-- !query | ||
SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,count(1) FILTER (WHERE (x IS NOT NULL)):bigint,regr_count(y, x):bigint> | ||
-- !query output | ||
1 0 0 | ||
2 3 3 | ||
|
||
|
||
-- !query | ||
SELECT regr_r2(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_r2(y, x):double> | ||
-- !query output | ||
0.9976905311778291 | ||
|
||
|
||
-- !query | ||
SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL | ||
-- !query schema | ||
struct<regr_r2(y, x):double> | ||
-- !query output | ||
0.9976905311778291 | ||
|
||
|
||
-- !query | ||
SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,corr(y, x):double,regr_r2(y, x):double> | ||
-- !query output | ||
1 NULL NULL | ||
2 0.9988445981121532 0.9976905311778291 | ||
|
||
|
||
-- !query | ||
SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,corr(y, x) FILTER (WHERE (x IS NOT NULL)):double,regr_r2(y, x):double> | ||
-- !query output | ||
1 NULL NULL | ||
2 0.9988445981121532 0.9976905311778291 | ||
|
||
|
||
-- !query | ||
SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_avgx(y, x):double,regr_avgy(y, x):double> | ||
-- !query output | ||
22.666666666666668 20.0 | ||
|
||
|
||
-- !query | ||
SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL | ||
-- !query schema | ||
struct<regr_avgx(y, x):double,regr_avgy(y, x):double> | ||
-- !query output | ||
22.666666666666668 20.0 | ||
|
||
|
||
-- !query | ||
SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,avg(x):double,avg(y):double,regr_avgx(y, x):double,regr_avgy(y, x):double> | ||
-- !query output | ||
1 NULL 10.0 NULL NULL | ||
2 22.666666666666668 21.25 22.666666666666668 20.0 | ||
|
||
|
||
-- !query | ||
SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,avg(x) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,avg(y) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,regr_avgx(y, x):double,regr_avgy(y, x):double> | ||
-- !query output | ||
1 NULL NULL NULL NULL | ||
2 22.666666666666668 20.0 22.666666666666668 20.0 | ||
|
||
|
||
-- !query | ||
SELECT regr_sxx(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_sxx(y, x):double> | ||
-- !query output | ||
288.66666666666663 | ||
|
||
|
||
-- !query | ||
SELECT regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL | ||
-- !query schema | ||
struct<regr_sxx(y, x):double> | ||
-- !query output | ||
288.66666666666663 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_sxx(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_sxx(y, x):double> | ||
-- !query output | ||
1 NULL | ||
2 288.66666666666663 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_sxx(y, x):double> | ||
-- !query output | ||
2 288.66666666666663 | ||
|
||
|
||
-- !query | ||
SELECT regr_sxy(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_sxy(y, x):double> | ||
-- !query output | ||
240.0 | ||
|
||
|
||
-- !query | ||
SELECT regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL | ||
-- !query schema | ||
struct<regr_sxy(y, x):double> | ||
-- !query output | ||
240.0 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_sxy(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_sxy(y, x):double> | ||
-- !query output | ||
1 NULL | ||
2 240.0 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_sxy(y, x):double> | ||
-- !query output | ||
2 240.0 | ||
|
||
|
||
-- !query | ||
SELECT regr_syy(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_syy(y, x):double> | ||
-- !query output | ||
200.0 | ||
|
||
|
||
-- !query | ||
SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL | ||
-- !query schema | ||
struct<regr_syy(y, x):double> | ||
-- !query output | ||
200.0 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_syy(y, x):double> | ||
-- !query output | ||
1 NULL | ||
2 200.0 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_syy(y, x):double> | ||
-- !query output | ||
2 200.0 | ||
|
||
|
||
-- !query | ||
SELECT regr_slope(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_slope(y, x):double> | ||
-- !query output | ||
0.8314087759815244 | ||
|
||
|
||
-- !query | ||
SELECT regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL | ||
-- !query schema | ||
struct<regr_slope(y, x):double> | ||
-- !query output | ||
0.8314087759815244 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_slope(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_slope(y, x):double> | ||
-- !query output | ||
1 NULL | ||
2 0.8314087759815244 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_slope(y, x):double> | ||
-- !query output | ||
2 0.8314087759815244 | ||
|
||
|
||
-- !query | ||
SELECT regr_intercept(y, x) FROM testRegression | ||
-- !query schema | ||
struct<regr_intercept(y, x):double> | ||
-- !query output | ||
1.1547344110854487 | ||
|
||
|
||
-- !query | ||
SELECT regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL | ||
-- !query schema | ||
struct<regr_intercept(y, x):double> | ||
-- !query output | ||
1.1547344110854487 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_intercept(y, x) FROM testRegression GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_intercept(y, x):double> | ||
-- !query output | ||
1 NULL | ||
2 1.1547344110854487 | ||
|
||
|
||
-- !query | ||
SELECT k, regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k | ||
-- !query schema | ||
struct<k:int,regr_intercept(y, x):double> | ||
-- !query output | ||
2 1.1547344110854487 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters