Skip to content

Commit

Permalink
[SPARK-50141][PYTHON] Make lpad and rpad accept Column type argum…
Browse files Browse the repository at this point in the history
…ents

### What changes were proposed in this pull request?
Make `lpad` and `rpad` accept Column type arguments

### Why are the changes needed?
The underlying expressions actually accepts more datatypes than `str`

### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
added doctests

### Was this patch authored or co-authored using generative AI tooling?
no

Closes apache#48672 from zhengruifeng/lr_pad_col.

Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
  • Loading branch information
zhengruifeng committed Oct 28, 2024
1 parent 03e051b commit 51bd96d
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 32 deletions.
16 changes: 12 additions & 4 deletions python/pyspark/sql/connect/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2570,15 +2570,23 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
locate.__doc__ = pysparkfuncs.locate.__doc__


def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
return _invoke_function("lpad", _to_col(col), lit(len), lit(pad))
def lpad(
col: "ColumnOrName",
len: Union[Column, int],
pad: Union[Column, str],
) -> Column:
return _invoke_function_over_columns("lpad", col, lit(len), lit(pad))


lpad.__doc__ = pysparkfuncs.lpad.__doc__


def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
return _invoke_function("rpad", _to_col(col), lit(len), lit(pad))
def rpad(
col: "ColumnOrName",
len: Union[Column, int],
pad: Union[Column, str],
) -> Column:
return _invoke_function_over_columns("rpad", col, lit(len), lit(pad))


rpad.__doc__ = pysparkfuncs.rpad.__doc__
Expand Down
102 changes: 82 additions & 20 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -12556,7 +12556,11 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:


@_try_remote_functions
def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
def lpad(
col: "ColumnOrName",
len: Union[Column, int],
pad: Union[Column, str],
) -> Column:
"""
Left-pad the string column to width `len` with `pad`.

Expand All @@ -12567,31 +12571,62 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:

Parameters
----------
col : :class:`~pyspark.sql.Column` or str
col : :class:`~pyspark.sql.Column` or column name
target column to work on.
len : int
len : :class:`~pyspark.sql.Column` or int
length of the final string.
pad : str

.. versionchanged:: 4.0.0
`pattern` now accepts column.

pad : :class:`~pyspark.sql.Column` or literal string
chars to prepend.

.. versionchanged:: 4.0.0
`pattern` now accepts column.

Returns
-------
:class:`~pyspark.sql.Column`
left padded result.

Examples
--------
>>> df = spark.createDataFrame([('abcd',)], ['s',])
>>> df.select(lpad(df.s, 6, '#').alias('s')).collect()
[Row(s='##abcd')]
"""
from pyspark.sql.classic.column import _to_java_column
Example 1: Pad with a literal string

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
>>> df.select("*", sf.lpad(df.s, 6, '#')).show()
+----+-------------+
| s|lpad(s, 6, #)|
+----+-------------+
|abcd| ##abcd|
| xyz| ###xyz|
| 12| ####12|
+----+-------------+

Example 2: Pad with a bytes column

return _invoke_function("lpad", _to_java_column(col), _enum_to_value(len), _enum_to_value(pad))
>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
>>> df.select("*", sf.lpad(df.s, 6, sf.lit(b"\x75\x76"))).show()
+----+-------------------+
| s|lpad(s, 6, X'7576')|
+----+-------------------+
|abcd| uvabcd|
| xyz| uvuxyz|
| 12| uvuv12|
+----+-------------------+
"""
return _invoke_function_over_columns("lpad", col, lit(len), lit(pad))


@_try_remote_functions
def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
def rpad(
col: "ColumnOrName",
len: Union[Column, int],
pad: Union[Column, str],
) -> Column:
"""
Right-pad the string column to width `len` with `pad`.

Expand All @@ -12604,10 +12639,17 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
----------
col : :class:`~pyspark.sql.Column` or str
target column to work on.
len : int
len : :class:`~pyspark.sql.Column` or int
length of the final string.
pad : str
chars to append.

.. versionchanged:: 4.0.0
`pattern` now accepts column.

pad : :class:`~pyspark.sql.Column` or literal string
chars to prepend.

.. versionchanged:: 4.0.0
`pattern` now accepts column.

Returns
-------
Expand All @@ -12616,13 +12658,33 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:

Examples
--------
>>> df = spark.createDataFrame([('abcd',)], ['s',])
>>> df.select(rpad(df.s, 6, '#').alias('s')).collect()
[Row(s='abcd##')]
"""
from pyspark.sql.classic.column import _to_java_column
Example 1: Pad with a literal string

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
>>> df.select("*", sf.rpad(df.s, 6, '#')).show()
+----+-------------+
| s|rpad(s, 6, #)|
+----+-------------+
|abcd| abcd##|
| xyz| xyz###|
| 12| 12####|
+----+-------------+

Example 2: Pad with a bytes column

return _invoke_function("rpad", _to_java_column(col), _enum_to_value(len), _enum_to_value(pad))
>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
>>> df.select("*", sf.rpad(df.s, 6, sf.lit(b"\x75\x76"))).show()
+----+-------------------+
| s|rpad(s, 6, X'7576')|
+----+-------------------+
|abcd| abcduv|
| xyz| xyzuvu|
| 12| 12uvuv|
+----+-------------------+
"""
return _invoke_function_over_columns("rpad", col, lit(len), lit(pad))


@_try_remote_functions
Expand Down
30 changes: 22 additions & 8 deletions sql/api/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4075,8 +4075,7 @@ object functions {
* @group string_funcs
* @since 1.5.0
*/
def lpad(str: Column, len: Int, pad: String): Column =
Column.fn("lpad", str, lit(len), lit(pad))
def lpad(str: Column, len: Int, pad: String): Column = lpad(str, lit(len), lit(pad))

/**
* Left-pad the binary column with pad to a byte length of len. If the binary column is longer
Expand All @@ -4085,8 +4084,16 @@ object functions {
* @group string_funcs
* @since 3.3.0
*/
def lpad(str: Column, len: Int, pad: Array[Byte]): Column =
Column.fn("lpad", str, lit(len), lit(pad))
def lpad(str: Column, len: Int, pad: Array[Byte]): Column = lpad(str, lit(len), lit(pad))

/**
* Left-pad the string column with pad to a length of len. If the string column is longer than
* len, the return value is shortened to len characters.
*
* @group string_funcs
* @since 4.0.0
*/
def lpad(str: Column, len: Column, pad: Column): Column = Column.fn("lpad", str, len, pad)

/**
* Trim the spaces from left end for the specified string value.
Expand Down Expand Up @@ -4263,8 +4270,7 @@ object functions {
* @group string_funcs
* @since 1.5.0
*/
def rpad(str: Column, len: Int, pad: String): Column =
Column.fn("rpad", str, lit(len), lit(pad))
def rpad(str: Column, len: Int, pad: String): Column = rpad(str, lit(len), lit(pad))

/**
* Right-pad the binary column with pad to a byte length of len. If the binary column is longer
Expand All @@ -4273,8 +4279,16 @@ object functions {
* @group string_funcs
* @since 3.3.0
*/
def rpad(str: Column, len: Int, pad: Array[Byte]): Column =
Column.fn("rpad", str, lit(len), lit(pad))
def rpad(str: Column, len: Int, pad: Array[Byte]): Column = rpad(str, lit(len), lit(pad))

/**
* Right-pad the string column with pad to a length of len. If the string column is longer than
* len, the return value is shortened to len characters.
*
* @group string_funcs
* @since 4.0.0
*/
def rpad(str: Column, len: Column, pad: Column): Column = Column.fn("rpad", str, len, pad)

/**
* Repeats a string column n times, and returns it as a new string column.
Expand Down

0 comments on commit 51bd96d

Please sign in to comment.