Skip to content

Commit

Permalink
[SPARK-50968][PYTHON] Fix the usage of Column.__new__
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

Fixes the usage of `Column.__new__`.

### Why are the changes needed?

Currently `Column.__init__` is called in `Column.__new__`, but it will call `__init__` twice, because it will be automatically called from Python, so it doesn't need to be explicitly called in `__new__`.

```py
>>> class A:
...   def __new__(cls, *args, **kwargs):
...     print(f"__NEW__: {args}, {kwargs}")
...     obj = object.__new__(cls)
...     obj.__init__(*args, **kwargs)
...     return obj
...   def __init__(self, *args, **kwargs):
...     print(f"__INIT__: {args}, {kwargs}")
...
>>> A(1,2,3, k=4)
__NEW__: (1, 2, 3), {'k': 4}
__INIT__: (1, 2, 3), {'k': 4}
__INIT__: (1, 2, 3), {'k': 4}
<__main__.A object at 0x102ccab90>

>>> class B:
...   def __new__(cls, *args, **kwargs):
...     print(f"__NEW__: {args}, {kwargs}")
...     return object.__new__(cls)
...   def __init__(self, *args, **kwargs):
...     print(f"__INIT__: {args}, {kwargs}")
...
>>> B(1,2,3, k=4)
__NEW__: (1, 2, 3), {'k': 4}
__INIT__: (1, 2, 3), {'k': 4}
<__main__.B object at 0x102b2b970>
```

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

The existing tests should pass.

### Was this patch authored or co-authored using generative AI tooling?

No.

Closes #49631 from ueshin/issues/SPARK-50968/column_new.

Authored-by: Takuya Ueshin <[email protected]>
Signed-off-by: Takuya Ueshin <[email protected]>
  • Loading branch information
ueshin committed Jan 24, 2025
1 parent 2d74c3d commit 7c316f7
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 28 deletions.
19 changes: 14 additions & 5 deletions python/pyspark/pandas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

from pyspark.sql import functions as F, Column as PySparkColumn
from pyspark.sql.types import BooleanType, LongType, DataType
from pyspark.sql.utils import is_remote
from pyspark.errors import AnalysisException
from pyspark import pandas as ps # noqa: F401
from pyspark.pandas._typing import Label, Name, Scalar
Expand Down Expand Up @@ -534,11 +535,19 @@ def __getitem__(self, key: Any) -> Union["Series", "DataFrame"]:
sdf = sdf.limit(sdf.count() + limit)
sdf = sdf.drop(NATURAL_ORDER_COLUMN_NAME)
except AnalysisException:
raise KeyError(
"[{}] don't exist in columns".format(
[col._jc.toString() for col in data_spark_columns]
)
)
if is_remote():
from pyspark.sql.connect.column import Column as ConnectColumn

cols_as_str = [
cast(ConnectColumn, col)._expr.__repr__() for col in data_spark_columns
]
else:
from pyspark.sql.classic.column import Column as ClassicColumn

cols_as_str = [
cast(ClassicColumn, col)._jc.toString() for col in data_spark_columns
]
raise KeyError("[{}] don't exist in columns".format(cols_as_str))

internal = InternalFrame(
spark_frame=sdf,
Expand Down
12 changes: 12 additions & 0 deletions python/pyspark/pandas/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,18 @@ def spark_column_equals(left: Column, right: Column) -> bool:
)
return repr(left).replace("`", "") == repr(right).replace("`", "")
else:
from pyspark.sql.classic.column import Column as ClassicColumn

if not isinstance(left, ClassicColumn):
raise PySparkTypeError(
errorClass="NOT_COLUMN",
messageParameters={"arg_name": "left", "arg_type": type(left).__name__},
)
if not isinstance(right, ClassicColumn):
raise PySparkTypeError(
errorClass="NOT_COLUMN",
messageParameters={"arg_name": "right", "arg_type": type(right).__name__},
)
return left._jc.equals(right._jc)


Expand Down
12 changes: 5 additions & 7 deletions python/pyspark/sql/classic/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,13 +177,11 @@ def _reverse_op(

@with_origin_to_class
class Column(ParentColumn):
def __new__(
cls,
jc: "JavaObject",
) -> "Column":
self = object.__new__(cls)
self.__init__(jc) # type: ignore[misc]
return self
def __new__(cls, *args: Any, **kwargs: Any) -> "Column":
return object.__new__(cls)

def __getnewargs__(self) -> Tuple[Any, ...]:
return (self._jc,)

def __init__(self, jc: "JavaObject") -> None:
self._jc = jc
Expand Down
11 changes: 2 additions & 9 deletions python/pyspark/sql/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from pyspark.errors import PySparkValueError

if TYPE_CHECKING:
from py4j.java_gateway import JavaObject
from pyspark.sql._typing import LiteralType, DecimalLiteral, DateTimeLiteral
from pyspark.sql.window import WindowSpec

Expand Down Expand Up @@ -72,16 +71,10 @@ class Column(TableValuedFunctionArgument):
# HACK ALERT!! this is to reduce the backward compatibility concern, and returns
# Spark Classic Column by default. This is NOT an API, and NOT supposed to
# be directly invoked. DO NOT use this constructor.
def __new__(
cls,
jc: "JavaObject",
) -> "Column":
def __new__(cls, *args: Any, **kwargs: Any) -> "Column":
from pyspark.sql.classic.column import Column

return Column.__new__(Column, jc)

def __init__(self, jc: "JavaObject") -> None:
self._jc = jc
return Column.__new__(Column, *args, **kwargs)

# arithmetic operators
@dispatch_col_method
Expand Down
13 changes: 6 additions & 7 deletions python/pyspark/sql/connect/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
Any,
Union,
Optional,
Tuple,
)

from pyspark.sql.column import Column as ParentColumn
Expand Down Expand Up @@ -109,13 +110,11 @@ def _to_expr(v: Any) -> Expression:

@with_origin_to_class(["to_plan"])
class Column(ParentColumn):
def __new__(
cls,
expr: "Expression",
) -> "Column":
self = object.__new__(cls)
self.__init__(expr) # type: ignore[misc]
return self
def __new__(cls, *args: Any, **kwargs: Any) -> "Column":
return object.__new__(cls)

def __getnewargs__(self) -> Tuple[Any, ...]:
return (self._expr,)

def __init__(self, expr: "Expression") -> None:
if not isinstance(expr, Expression):
Expand Down

0 comments on commit 7c316f7

Please sign in to comment.