-
Notifications
You must be signed in to change notification settings - Fork 28.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-50582][SQL][PYTHON] Add quote builtin function #49191
base: master
Are you sure you want to change the base?
Changes from 3 commits
c0b826a
519abdb
abd65e0
5fbe663
e630f4e
6d9ab0b
230a967
56b0c0f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3723,3 +3723,40 @@ case class Luhncheck(input: Expression) extends RuntimeReplaceable with Implicit | |
override protected def withNewChildrenInternal( | ||
newChildren: IndexedSeq[Expression]): Expression = copy(newChildren(0)) | ||
} | ||
|
||
/** | ||
* A function that prepends a backslash to each instance of single quote | ||
* in the given string and encloses the result by single quotes. | ||
*/ | ||
@ExpressionDescription( | ||
usage = """ | ||
_FUNC_(str) - Returns `str` enclosed by single quotes and | ||
each instance of single quote in it is preceded by a backslash. | ||
""", | ||
examples = """ | ||
Examples: | ||
> SELECT _FUNC_('Don\'t'); | ||
'Don\'t' | ||
""", | ||
since = "4.0.0", | ||
group = "string_funcs") | ||
case class Quote(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes | ||
with UnaryLike[Expression] { | ||
override def nullIntolerant: Boolean = true | ||
|
||
override lazy val replacement: Expression = Invoke(input, "quote", input.dataType) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can't you use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean for instance, implementing |
||
|
||
override def inputTypes: Seq[AbstractDataType] = { | ||
Seq(StringTypeWithCollation(supportsTrimCollation = true)) | ||
} | ||
|
||
override def nodeName: String = "quote" | ||
|
||
override def nullable: Boolean = true | ||
|
||
override def child: Expression = input | ||
|
||
override protected def withNewChildInternal(newChild: Expression): Quote = { | ||
copy(input = newChild) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -165,8 +165,8 @@ | |
| org.apache.spark.sql.catalyst.expressions.If | if | SELECT if(1 < 2, 'a', 'b') | struct<(IF((1 < 2), a, b)):string> | | ||
| org.apache.spark.sql.catalyst.expressions.In | in | SELECT 1 in(1, 2, 3) | struct<(1 IN (1, 2, 3)):boolean> | | ||
| org.apache.spark.sql.catalyst.expressions.InitCap | initcap | SELECT initcap('sPark sql') | struct<initcap(sPark sql):string> | | ||
| org.apache.spark.sql.catalyst.expressions.Inline | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct<col1:int,col2:string> | | ||
| org.apache.spark.sql.catalyst.expressions.Inline | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct<col1:int,col2:string> | | ||
| org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder | inline | SELECT inline(array(struct(1, 'a'), struct(2, 'b'))) | struct<col1:int,col2:string> | | ||
| org.apache.spark.sql.catalyst.expressions.InlineExpressionBuilder | inline_outer | SELECT inline_outer(array(struct(1, 'a'), struct(2, 'b'))) | struct<col1:int,col2:string> | | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this related to this PR? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dongjoon-hyun There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I agree with you. Let's handle them independently. |
||
| org.apache.spark.sql.catalyst.expressions.InputFileBlockLength | input_file_block_length | SELECT input_file_block_length() | struct<input_file_block_length():bigint> | | ||
| org.apache.spark.sql.catalyst.expressions.InputFileBlockStart | input_file_block_start | SELECT input_file_block_start() | struct<input_file_block_start():bigint> | | ||
| org.apache.spark.sql.catalyst.expressions.InputFileName | input_file_name | SELECT input_file_name() | struct<input_file_name():string> | | ||
|
@@ -253,11 +253,12 @@ | |
| org.apache.spark.sql.catalyst.expressions.PercentRank | percent_rank | SELECT a, b, percent_rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,PERCENT_RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> | | ||
| org.apache.spark.sql.catalyst.expressions.Pi | pi | SELECT pi() | struct<PI():double> | | ||
| org.apache.spark.sql.catalyst.expressions.Pmod | pmod | SELECT pmod(10, 3) | struct<pmod(10, 3):int> | | ||
| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode | SELECT posexplode(array(10,20)) | struct<pos:int,col:int> | | ||
| org.apache.spark.sql.catalyst.expressions.PosExplode | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct<pos:int,col:int> | | ||
| org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder | posexplode | SELECT posexplode(array(10,20)) | struct<pos:int,col:int> | | ||
| org.apache.spark.sql.catalyst.expressions.PosExplodeExpressionBuilder | posexplode_outer | SELECT posexplode_outer(array(10,20)) | struct<pos:int,col:int> | | ||
| org.apache.spark.sql.catalyst.expressions.Pow | pow | SELECT pow(2, 3) | struct<pow(2, 3):double> | | ||
| org.apache.spark.sql.catalyst.expressions.Pow | power | SELECT power(2, 3) | struct<POWER(2, 3):double> | | ||
| org.apache.spark.sql.catalyst.expressions.Quarter | quarter | SELECT quarter('2016-08-31') | struct<quarter(2016-08-31):int> | | ||
| org.apache.spark.sql.catalyst.expressions.Quote | quote | SELECT quote('Don\'t') | struct<quote(Don't):string> | | ||
| org.apache.spark.sql.catalyst.expressions.RLike | regexp | SELECT regexp('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> | | ||
| org.apache.spark.sql.catalyst.expressions.RLike | regexp_like | SELECT regexp_like('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<REGEXP_LIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> | | ||
| org.apache.spark.sql.catalyst.expressions.RLike | rlike | SELECT rlike('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*') | struct<RLIKE(%SystemDrive%UsersJohn, %SystemDrive%\Users.*):boolean> | | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1452,4 +1452,21 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { | |
Seq(Row("abc", "def"))) | ||
} | ||
} | ||
|
||
test("SPARK-50582: string quote function") { | ||
val df = Seq(("Don't")).toDF("value") | ||
|
||
checkAnswer( | ||
df.select(quote($"value")), | ||
Row("'Don\\'t'")) | ||
|
||
checkAnswer( | ||
df.selectExpr("quote('Spark')"), | ||
Row("'Spark'") | ||
) | ||
|
||
checkAnswer( | ||
df.selectExpr("quote(NULL)"), | ||
Row(null)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is duplicates of checks in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I think |
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be better to leave one string because the new line character occurs in docs.