Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
mihailotim-db committed Jan 31, 2025
1 parent 7243de6 commit 6d9578b
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ package object util extends Logging {
// generated column names don't contain back-ticks or double-quotes.
def usePrettyExpression(e: Expression): Expression = e transform {
case a: Attribute => new PrettyAttribute(a)
case Literal(s: UTF8String, collationStringType: StringType)
if collationStringType.collationId != 0 =>
PrettyAttribute(s.toString, StringType)
case Literal(s: UTF8String, StringType) => PrettyAttribute(s.toString, StringType)
case Literal(v, t: NumericType) if v != null => PrettyAttribute(v.toString, t)
case Literal(null, dataType) => PrettyAttribute("NULL", dataType)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -442,77 +442,77 @@ Project [array_except(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_l
-- !query
select 'a' collate unicode < 'A'
-- !query analysis
Project [(collate(a, unicode) < A) AS (collate(a, unicode) < 'A' collate UNICODE)#x]
Project [(collate(a, unicode) < A) AS (collate(a, unicode) < A)#x]
+- OneRowRelation


-- !query
select 'a' collate unicode_ci = 'A'
-- !query analysis
Project [(collate(a, unicode_ci) = A) AS (collate(a, unicode_ci) = 'A' collate UNICODE_CI)#x]
Project [(collate(a, unicode_ci) = A) AS (collate(a, unicode_ci) = A)#x]
+- OneRowRelation


-- !query
select 'a' collate unicode_ai = 'å'
-- !query analysis
Project [(collate(a, unicode_ai) = å) AS (collate(a, unicode_ai) = 'å' collate UNICODE_AI)#x]
Project [(collate(a, unicode_ai) = å) AS (collate(a, unicode_ai) = å)#x]
+- OneRowRelation


-- !query
select 'a' collate unicode_ci_ai = 'Å'
-- !query analysis
Project [(collate(a, unicode_ci_ai) = Å) AS (collate(a, unicode_ci_ai) = 'Å' collate UNICODE_CI_AI)#x]
Project [(collate(a, unicode_ci_ai) = Å) AS (collate(a, unicode_ci_ai) = Å)#x]
+- OneRowRelation


-- !query
select 'a' collate en < 'A'
-- !query analysis
Project [(collate(a, en) < A) AS (collate(a, en) < 'A' collate en)#x]
Project [(collate(a, en) < A) AS (collate(a, en) < A)#x]
+- OneRowRelation


-- !query
select 'a' collate en_ci = 'A'
-- !query analysis
Project [(collate(a, en_ci) = A) AS (collate(a, en_ci) = 'A' collate en_CI)#x]
Project [(collate(a, en_ci) = A) AS (collate(a, en_ci) = A)#x]
+- OneRowRelation


-- !query
select 'a' collate en_ai = 'å'
-- !query analysis
Project [(collate(a, en_ai) = å) AS (collate(a, en_ai) = 'å' collate en_AI)#x]
Project [(collate(a, en_ai) = å) AS (collate(a, en_ai) = å)#x]
+- OneRowRelation


-- !query
select 'a' collate en_ci_ai = 'Å'
-- !query analysis
Project [(collate(a, en_ci_ai) = Å) AS (collate(a, en_ci_ai) = 'Å' collate en_CI_AI)#x]
Project [(collate(a, en_ci_ai) = Å) AS (collate(a, en_ci_ai) = Å)#x]
+- OneRowRelation


-- !query
select 'Kypper' collate sv < 'Köpfe'
-- !query analysis
Project [(collate(Kypper, sv) < Köpfe) AS (collate(Kypper, sv) < 'Köpfe' collate sv)#x]
Project [(collate(Kypper, sv) < Köpfe) AS (collate(Kypper, sv) < Köpfe)#x]
+- OneRowRelation


-- !query
select 'Kypper' collate de > 'Köpfe'
-- !query analysis
Project [(collate(Kypper, de) > Köpfe) AS (collate(Kypper, de) > 'Köpfe' collate de)#x]
Project [(collate(Kypper, de) > Köpfe) AS (collate(Kypper, de) > Köpfe)#x]
+- OneRowRelation


-- !query
select 'I' collate tr_ci = 'ı'
-- !query analysis
Project [(collate(I, tr_ci) = ı) AS (collate(I, tr_ci) = 'ı' collate tr_CI)#x]
Project [(collate(I, tr_ci) = ı) AS (collate(I, tr_ci) = ı)#x]
+- OneRowRelation


Expand Down Expand Up @@ -919,7 +919,7 @@ Project [elt(1, collate(utf8_binary#x, utf8_binary), cast(utf8_lcase#x as string
-- !query
select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5
-- !query analysis
Project [elt(1, utf8_binary#x, word, true) AS elt(1, utf8_binary, word)#x, elt(1, utf8_lcase#x, word, true) AS elt(1, utf8_lcase, 'word' collate UTF8_LCASE)#x]
Project [elt(1, utf8_binary#x, word, true) AS elt(1, utf8_binary, word)#x, elt(1, utf8_lcase#x, word, true) AS elt(1, utf8_lcase, word)#x]
+- SubqueryAlias spark_catalog.default.t5
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet

Expand Down Expand Up @@ -1684,7 +1684,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"inputType" : "\"STRING COLLATE UNICODE_AI\"",
"paramIndex" : "first",
"requiredType" : "\"STRING\"",
"sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 'abc' collate UNICODE_AI)\""
"sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), abc)\""
},
"queryContext" : [ {
"objectType" : "",
Expand Down Expand Up @@ -2073,7 +2073,7 @@ Project [overlay(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_
-- !query
select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5
-- !query analysis
Project [overlay(utf8_binary#x, a, 2, -1) AS overlay(utf8_binary, a, 2, -1)#x, overlay(utf8_lcase#x, a, 2, -1) AS overlay(utf8_lcase, 'a' collate UTF8_LCASE, 2, -1)#x]
Project [overlay(utf8_binary#x, a, 2, -1) AS overlay(utf8_binary, a, 2, -1)#x, overlay(utf8_lcase#x, a, 2, -1) AS overlay(utf8_lcase, a, 2, -1)#x]
+- SubqueryAlias spark_catalog.default.t5
+- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet

Expand Down
28 changes: 14 additions & 14 deletions sql/core/src/test/resources/sql-tests/results/collations.sql.out
Original file line number Diff line number Diff line change
Expand Up @@ -479,87 +479,87 @@ struct<array_except(array(collate(aaa, utf8_lcase)), array(collate(AAA, utf8_lca
-- !query
select 'a' collate unicode < 'A'
-- !query schema
struct<(collate(a, unicode) < 'A' collate UNICODE):boolean>
struct<(collate(a, unicode) < A):boolean>
-- !query output
true


-- !query
select 'a' collate unicode_ci = 'A'
-- !query schema
struct<(collate(a, unicode_ci) = 'A' collate UNICODE_CI):boolean>
struct<(collate(a, unicode_ci) = A):boolean>
-- !query output
true


-- !query
select 'a' collate unicode_ai = 'å'
-- !query schema
struct<(collate(a, unicode_ai) = 'å' collate UNICODE_AI):boolean>
struct<(collate(a, unicode_ai) = å):boolean>
-- !query output
true


-- !query
select 'a' collate unicode_ci_ai = 'Å'
-- !query schema
struct<(collate(a, unicode_ci_ai) = 'Å' collate UNICODE_CI_AI):boolean>
struct<(collate(a, unicode_ci_ai) = Å):boolean>
-- !query output
true


-- !query
select 'a' collate en < 'A'
-- !query schema
struct<(collate(a, en) < 'A' collate en):boolean>
struct<(collate(a, en) < A):boolean>
-- !query output
true


-- !query
select 'a' collate en_ci = 'A'
-- !query schema
struct<(collate(a, en_ci) = 'A' collate en_CI):boolean>
struct<(collate(a, en_ci) = A):boolean>
-- !query output
true


-- !query
select 'a' collate en_ai = 'å'
-- !query schema
struct<(collate(a, en_ai) = 'å' collate en_AI):boolean>
struct<(collate(a, en_ai) = å):boolean>
-- !query output
true


-- !query
select 'a' collate en_ci_ai = 'Å'
-- !query schema
struct<(collate(a, en_ci_ai) = 'Å' collate en_CI_AI):boolean>
struct<(collate(a, en_ci_ai) = Å):boolean>
-- !query output
true


-- !query
select 'Kypper' collate sv < 'Köpfe'
-- !query schema
struct<(collate(Kypper, sv) < 'Köpfe' collate sv):boolean>
struct<(collate(Kypper, sv) < Köpfe):boolean>
-- !query output
true


-- !query
select 'Kypper' collate de > 'Köpfe'
-- !query schema
struct<(collate(Kypper, de) > 'Köpfe' collate de):boolean>
struct<(collate(Kypper, de) > Köpfe):boolean>
-- !query output
true


-- !query
select 'I' collate tr_ci = 'ı'
-- !query schema
struct<(collate(I, tr_ci) = 'ı' collate tr_CI):boolean>
struct<(collate(I, tr_ci) = ı):boolean>
-- !query output
true

Expand Down Expand Up @@ -1120,7 +1120,7 @@ kitten
-- !query
select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5
-- !query schema
struct<elt(1, utf8_binary, word):string,elt(1, utf8_lcase, 'word' collate UTF8_LCASE):string collate UTF8_LCASE>
struct<elt(1, utf8_binary, word):string,elt(1, utf8_lcase, word):string collate UTF8_LCASE>
-- !query output
Hello, world! Nice day. Hello, world! Nice day.
Something else. Nothing here. Something else. Nothing here.
Expand Down Expand Up @@ -2549,7 +2549,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
"inputType" : "\"STRING COLLATE UNICODE_AI\"",
"paramIndex" : "first",
"requiredType" : "\"STRING\"",
"sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 'abc' collate UNICODE_AI)\""
"sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), abc)\""
},
"queryContext" : [ {
"objectType" : "",
Expand Down Expand Up @@ -3413,7 +3413,7 @@ ksitTing
-- !query
select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5
-- !query schema
struct<overlay(utf8_binary, a, 2, -1):string,overlay(utf8_lcase, 'a' collate UTF8_LCASE, 2, -1):string collate UTF8_LCASE>
struct<overlay(utf8_binary, a, 2, -1):string,overlay(utf8_lcase, a, 2, -1):string collate UTF8_LCASE>
-- !query output
Hallo, world! Nice day. Hallo, world! Nice day.
Saark SaL
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1209,10 +1209,9 @@ class CollationSQLExpressionsSuite
condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
sqlState = Some("42K09"),
parameters = Map(
"sqlExpr" -> ("\"str_to_map('a:1,b:2,c:3' collate " + s"${t.collation}, " +
"'?' collate " + s"${t.collation}, '?' collate ${t.collation})" + "\""),
"sqlExpr" -> "\"str_to_map(a:1,b:2,c:3, ?, ?)\"",
"paramIndex" -> "first",
"inputSql" -> ("\"'a:1,b:2,c:3' collate " + s"${t.collation}" + "\""),
"inputSql" -> "\"a:1,b:2,c:3\"",
"inputType" -> ("\"STRING COLLATE " + s"${t.collation}" + "\""),
"requiredType" -> "\"STRING\""),
context = ExpectedContext(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -451,8 +451,7 @@ class CollationSQLRegexpSuite
condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
parameters = Map(
"sqlExpr" ->
("\"regexp_replace(collate(ABCDE, UNICODE_CI), '.c.' collate UNICODE_CI," +
" 'FFF' collate UNICODE_CI, 1)\""),
("\"regexp_replace(collate(ABCDE, UNICODE_CI), .c., FFF, 1)\""),
"paramIndex" -> "first",
"inputSql" -> "\"collate(ABCDE, UNICODE_CI)\"",
"inputType" -> "\"STRING COLLATE UNICODE_CI\"",
Expand Down
Loading

0 comments on commit 6d9578b

Please sign in to comment.