From c0b826a44a9bccf4f71593e86abdf92d74b98faa Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 16 Dec 2024 05:19:43 +0900 Subject: [PATCH 1/6] Add quote builtin function. --- .../apache/spark/unsafe/types/UTF8String.java | 8 ++++ python/pyspark/sql/functions/builtin.py | 30 +++++++++++++++ .../org/apache/spark/sql/functions.scala | 9 +++++ .../catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/stringExpressions.scala | 37 +++++++++++++++++++ .../nonansi/string-functions.sql.out | 21 +++++++++++ .../analyzer-results/string-functions.sql.out | 21 +++++++++++ .../sql-tests/inputs/string-functions.sql | 5 +++ .../results/nonansi/string-functions.sql.out | 24 ++++++++++++ .../results/string-functions.sql.out | 24 ++++++++++++ .../spark/sql/StringFunctionsSuite.scala | 17 +++++++++ 11 files changed, 197 insertions(+) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index caf8461b0b5d6..fda0539d1669a 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -2160,6 +2160,14 @@ public UTF8String soundex() { return UTF8String.fromBytes(sx); } + public UTF8String quote() { + final String qtChar = "'"; + final String qtCharRep = "\\\\'"; + + String sp = toString().replaceAll(qtChar, qtCharRep); + return fromString(qtChar + sp + qtChar); + } + @Override public void writeExternal(ObjectOutput out) throws IOException { byte[] bytes = getBytes(); diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 4b4c164055eaf..446d37b2789bc 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -17100,6 +17100,36 @@ def collation(col: "ColumnOrName") -> Column: return _invoke_function_over_columns("collation", col) +@_try_remote_functions +def quote(col: "ColumnOrName") -> Column: + r"""Returns `str` enclosed by single quotes and each instance of single quote in it is preceded by a backslash. + + .. versionadded:: 4.0.0 + + Parameters + ---------- + col : :class:`~pyspark.sql.Column` or column name + target column to be quoted. + + Returns + ------- + :class:`~pyspark.sql.Column` + quoted string + + Examples + -------- + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame(["Don't"], "STRING") + >>> df.select("*", sf.quote("value")).show() + +-----+------------+ + |value|quote(value)| + +-----+------------+ + |Don't| 'Don\'t'| + +-----+------------+ + """ + return _invoke_function_over_columns("quote", col) + + # ---------------------- Collection functions ------------------------------ diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index 9f509fa843a2b..96e47ed768f97 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -5074,6 +5074,15 @@ object functions { */ def right(str: Column, len: Column): Column = Column.fn("right", str, len) + /** + * Returns `str` enclosed by single quotes and + * each instance of single quote in it is preceded by a backslash. + * + * @group string_funcs + * @since 4.0.0 + */ + def quote(str: Column): Column = Column.fn("quote", str) + ////////////////////////////////////////////////////////////////////////////////////////////// // DateTime functions ////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index d9e9f49ce065e..b70ad911ce6c3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -613,6 +613,7 @@ object FunctionRegistry { expression[MakeValidUTF8]("make_valid_utf8"), expression[ValidateUTF8]("validate_utf8"), expression[TryValidateUTF8]("try_validate_utf8"), + expression[Quote]("quote"), // url functions expression[UrlEncode]("url_encode"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index efd7e5c07de40..228c92c04c335 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -3723,3 +3723,40 @@ case class Luhncheck(input: Expression) extends RuntimeReplaceable with Implicit override protected def withNewChildrenInternal( newChildren: IndexedSeq[Expression]): Expression = copy(newChildren(0)) } + +/** + * A function that prepends a backslash to each instance of single quote + * in the given string and encloses the result by single quotes. + */ +@ExpressionDescription( + usage = """ + _FUNC_(str) - Returns `str` enclosed by single quotes and + each instance of single quote in it is preceded by a backslash. + """, + examples = """ + Examples: + > SELECT _FUNC_('Don\'t'); + 'Don\'t' + """, + since = "4.0.0", + group = "string_funcs") +case class Quote(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes + with UnaryLike[Expression] { + override def nullIntolerant: Boolean = true + + override lazy val replacement: Expression = Invoke(input, "quote", input.dataType) + + override def inputTypes: Seq[AbstractDataType] = { + Seq(StringTypeWithCollation(supportsTrimCollation = true)) + } + + override def nodeName: String = "quote" + + override def nullable: Boolean = true + + override def child: Expression = input + + override protected def withNewChildInternal(newChild: Expression): Quote = { + copy(input = newChild) + } +} diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/string-functions.sql.out index 98664dedf820c..ee4ad922fa8a5 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/string-functions.sql.out @@ -1796,3 +1796,24 @@ select try_validate_utf8(x'80') -- !query analysis Project [try_validate_utf8(cast(0x80 as string)) AS try_validate_utf8(X'80')#x] +- OneRowRelation + + +-- !query +select quote('Spark') +-- !query analysis +Project [quote(Spark) AS quote(Spark)#x] ++- OneRowRelation + + +-- !query +select quote("Don't") +-- !query analysis +Project [quote(Don't) AS quote(Don't)#x] ++- OneRowRelation + + +-- !query +select quote(NULL) +-- !query analysis +Project [quote(cast(null as string)) AS quote(NULL)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out index 98664dedf820c..ee4ad922fa8a5 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/string-functions.sql.out @@ -1796,3 +1796,24 @@ select try_validate_utf8(x'80') -- !query analysis Project [try_validate_utf8(cast(0x80 as string)) AS try_validate_utf8(X'80')#x] +- OneRowRelation + + +-- !query +select quote('Spark') +-- !query analysis +Project [quote(Spark) AS quote(Spark)#x] ++- OneRowRelation + + +-- !query +select quote("Don't") +-- !query analysis +Project [quote(Don't) AS quote(Don't)#x] ++- OneRowRelation + + +-- !query +select quote(NULL) +-- !query analysis +Project [quote(cast(null as string)) AS quote(NULL)#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql index c108f7c76f764..878fa992f81b4 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql @@ -306,3 +306,8 @@ select validate_utf8(x'80'); select try_validate_utf8(''); select try_validate_utf8('abc'); select try_validate_utf8(x'80'); + +-- quote +select quote('Spark'); +select quote("Don't"); +select quote(NULL); \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/string-functions.sql.out index 3f9f24f817f2c..bb4d615deb292 100644 --- a/sql/core/src/test/resources/sql-tests/results/nonansi/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/nonansi/string-functions.sql.out @@ -2277,3 +2277,27 @@ select try_validate_utf8(x'80') struct -- !query output NULL + + +-- !query +select quote('Spark') +-- !query schema +struct +-- !query output +'Spark' + + +-- !query +select quote("Don't") +-- !query schema +struct +-- !query output +'Don\'t' + + +-- !query +select quote(NULL) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out index 706673606625b..7bbef71854d2c 100644 --- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out @@ -2341,3 +2341,27 @@ select try_validate_utf8(x'80') struct -- !query output NULL + + +-- !query +select quote('Spark') +-- !query schema +struct +-- !query output +'Spark' + + +-- !query +select quote("Don't") +-- !query schema +struct +-- !query output +'Don\'t' + + +-- !query +select quote(NULL) +-- !query schema +struct +-- !query output +NULL diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 2e91d60e4ba04..ebba2e540abd2 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -1452,4 +1452,21 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row("abc", "def"))) } } + + test("SPARK-50582: string quote function") { + val df = Seq(("Don't")).toDF("value") + + checkAnswer( + df.select(quote($"value")), + Row("'Don\\'t'")) + + checkAnswer( + df.selectExpr("quote('Spark')"), + Row("'Spark'") + ) + + checkAnswer( + df.selectExpr("quote(NULL)"), + Row(null)) + } } From 519abdb4065794898c6d86e48497f6b70710a40a Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 16 Dec 2024 15:24:42 +0900 Subject: [PATCH 2/6] Fix for tests. --- python/pyspark/sql/connect/functions/builtin.py | 7 +++++++ .../src/main/scala/org/apache/spark/sql/functions.scala | 4 ++-- .../resources/sql-functions/sql-expression-schema.md | 9 +++++---- 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py index f52cdffb84b7c..0a6349b3cf792 100644 --- a/python/pyspark/sql/connect/functions/builtin.py +++ b/python/pyspark/sql/connect/functions/builtin.py @@ -3040,6 +3040,13 @@ def collation(col: "ColumnOrName") -> Column: collation.__doc__ = pysparkfuncs.collation.__doc__ +def quote(col: "ColumnOrName") -> Column: + return _invoke_function_over_columns("quote", col) + + +quote.__doc__ = pysparkfuncs.quote.__doc__ + + # Date/Timestamp functions diff --git a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala index 96e47ed768f97..56a5f029befbb 100644 --- a/sql/api/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala @@ -5075,8 +5075,8 @@ object functions { def right(str: Column, len: Column): Column = Column.fn("right", str, len) /** - * Returns `str` enclosed by single quotes and - * each instance of single quote in it is preceded by a backslash. + * Returns `str` enclosed by single quotes and each instance of single quote in it is preceded + * by a backslash. * * @group string_funcs * @since 4.0.0 diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 39cefdaa892b2..da081bfdaee4a 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -165,8 +165,8 @@ | org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> | | org.apache.spark.sql.catalyst.expressions.In | in | SELECT 1 in(1, 2, 3) | struct<(1 IN (1, 2, 3)):boolean> | | org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct | -| org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | -| org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | | org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | SELECT input_file_block_length() | struct | | org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | SELECT input_file_block_start() | struct | | org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | SELECT input_file_name() | struct | @@ -253,11 +253,12 @@ | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct | | org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct | -| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct | -| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct | +| org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder | posexplode | SELECT posexplode(array(10,20)) | struct | +| org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct | | org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | +| org.apache.spark.sql.catalyst.expressions.Quote | quote | SELECT quote('Don\'t') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | regexp | SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | | org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct | From abd65e00d4b461d7c9a3763c011a4f34e700b903 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Mon, 16 Dec 2024 16:41:17 +0900 Subject: [PATCH 3/6] Fix for flake8. --- python/pyspark/sql/functions/builtin.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py index 446d37b2789bc..57c2cd51552ab 100644 --- a/python/pyspark/sql/functions/builtin.py +++ b/python/pyspark/sql/functions/builtin.py @@ -17102,7 +17102,8 @@ def collation(col: "ColumnOrName") -> Column: @_try_remote_functions def quote(col: "ColumnOrName") -> Column: - r"""Returns `str` enclosed by single quotes and each instance of single quote in it is preceded by a backslash. + r"""Returns `str` enclosed by single quotes and each instance of + single quote in it is preceded by a backslash. .. versionadded:: 4.0.0 From e630f4e958c6c468e36779e400196fa95082cf3e Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Tue, 21 Jan 2025 16:35:50 +0900 Subject: [PATCH 4/6] Remove irrelevant changes --- .../test/resources/sql-functions/sql-expression-schema.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index da081bfdaee4a..3a9bb407808c1 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -165,8 +165,8 @@ | org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> | | org.apache.spark.sql.catalyst.expressions.In | in | SELECT 1 in(1, 2, 3) | struct<(1 IN (1, 2, 3)):boolean> | | org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct | -| org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | -| org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct | +| org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct | | org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | SELECT input_file_block_length() | struct | | org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | SELECT input_file_block_start() | struct | | org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | SELECT input_file_name() | struct | @@ -253,8 +253,8 @@ | org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct | | org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct | | org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct | -| org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder | posexplode | SELECT posexplode(array(10,20)) | struct | -| org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct | +| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct | +| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct | | org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct | | org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct | From 230a967b8959609925b74e3013af9b887848f5e0 Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Thu, 23 Jan 2025 22:36:27 +0900 Subject: [PATCH 5/6] Move quote function to ExpressionUtils, and use StaticInvoke instead of Invoke --- .../apache/spark/unsafe/types/UTF8String.java | 8 ------- .../expressions/ExpressionImplUtils.java | 8 +++++++ .../expressions/stringExpressions.scala | 22 +++++++++++++------ .../spark/sql/StringFunctionsSuite.scala | 17 -------------- 4 files changed, 23 insertions(+), 32 deletions(-) diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index fda0539d1669a..caf8461b0b5d6 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -2160,14 +2160,6 @@ public UTF8String soundex() { return UTF8String.fromBytes(sx); } - public UTF8String quote() { - final String qtChar = "'"; - final String qtCharRep = "\\\\'"; - - String sp = toString().replaceAll(qtChar, qtCharRep); - return fromString(qtChar + sp + qtChar); - } - @Override public void writeExternal(ObjectOutput out) throws IOException { byte[] bytes = getBytes(); diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java index 2fad36efe8cc1..8c42e5bf112c4 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java @@ -334,4 +334,12 @@ public static UTF8String randStr(XORShiftRandom rng, int length) { } return UTF8String.fromBytes(bytes); } + + public static UTF8String quote(UTF8String str) { + final String qtChar = "'"; + final String qtCharRep = "\\\\'"; + + String sp = str.toString().replaceAll(qtChar, qtCharRep); + return UTF8String.fromString(qtChar + sp + qtChar); + } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 20e44d0420ee2..a9a337f741432 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -3724,11 +3724,9 @@ case class Luhncheck(input: Expression) extends RuntimeReplaceable with Implicit * A function that prepends a backslash to each instance of single quote * in the given string and encloses the result by single quotes. */ +// scalastyle:off line.size.limit @ExpressionDescription( - usage = """ - _FUNC_(str) - Returns `str` enclosed by single quotes and - each instance of single quote in it is preceded by a backslash. - """, + usage = "_FUNC_(str) - Returns `str` enclosed by single quotes and each instance of single quote in it is preceded by a backslash.", examples = """ Examples: > SELECT _FUNC_('Don\'t'); @@ -3736,11 +3734,21 @@ case class Luhncheck(input: Expression) extends RuntimeReplaceable with Implicit """, since = "4.0.0", group = "string_funcs") -case class Quote(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes - with UnaryLike[Expression] { +// scalastyle:on line.size.limit +case class Quote(input: Expression) + extends UnaryExpression + with RuntimeReplaceable + with ImplicitCastInputTypes + with DefaultStringProducingExpression { + override def nullIntolerant: Boolean = true - override lazy val replacement: Expression = Invoke(input, "quote", input.dataType) + override lazy val replacement: Expression = StaticInvoke( + classOf[ExpressionImplUtils], + dataType, + "quote", + Seq(input), + inputTypes) override def inputTypes: Seq[AbstractDataType] = { Seq(StringTypeWithCollation(supportsTrimCollation = true)) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index ebba2e540abd2..2e91d60e4ba04 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -1452,21 +1452,4 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { Seq(Row("abc", "def"))) } } - - test("SPARK-50582: string quote function") { - val df = Seq(("Don't")).toDF("value") - - checkAnswer( - df.select(quote($"value")), - Row("'Don\\'t'")) - - checkAnswer( - df.selectExpr("quote('Spark')"), - Row("'Spark'") - ) - - checkAnswer( - df.selectExpr("quote(NULL)"), - Row(null)) - } } From 56b0c0f8144a58efd93afabfb3370074e2cdba3e Mon Sep 17 00:00:00 2001 From: Kousuke Saruta Date: Fri, 24 Jan 2025 01:33:33 +0900 Subject: [PATCH 6/6] Fix for ExpressionInfoSuite --- .../spark/sql/catalyst/expressions/stringExpressions.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index a9a337f741432..b9ec6a907dc63 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -3741,8 +3741,6 @@ case class Quote(input: Expression) with ImplicitCastInputTypes with DefaultStringProducingExpression { - override def nullIntolerant: Boolean = true - override lazy val replacement: Expression = StaticInvoke( classOf[ExpressionImplUtils], dataType,