Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SNOW-1803811: Allow mixed-case field names for struct type columns #2640

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
- `catalog_sync`
- `storage_serialization_policy`
- Added support for nested data types to `DataFrame.print_schema`
- Added support for mixed case field names in struct type columns.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this only affected structured types or all StructTypes? If it is the latter, then it would be a bcr, no?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This only affects structured types because currently semi-structured objects don't become StructType columns, but instead get converted to MapType for some reason.

#### Bug Fixes

Expand Down
2 changes: 1 addition & 1 deletion src/snowflake/snowpark/_internal/type_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@ def convert_sp_to_sf_type(datatype: DataType) -> str:
if isinstance(datatype, StructType):
if datatype.structured:
fields = ", ".join(
f"{field.name} {convert_sp_to_sf_type(field.datatype)}"
f"{field.raw_name} {convert_sp_to_sf_type(field.datatype)}"
Copy link
Contributor

@sfc-gh-aling sfc-gh-aling Nov 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I kinda have the same problem as Afroz's on BCR.. previous we have name upper cased but now we do not, this won't break users when user references keys in the map object?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I know nobody is using StructType columns yet due to them requiring structured types be enabled for their account.

for field in datatype.fields
)
return f"OBJECT({fields})"
Expand Down
5 changes: 5 additions & 0 deletions src/snowflake/snowpark/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,7 @@ class ColumnIdentifier:
"""Represents a column identifier."""

def __init__(self, normalized_name: str) -> None:
self.raw_name = normalized_name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this is internal usage only then I prefer marking them as private _raw_name

self.normalized_name = quote_name(normalized_name)

@property
Expand Down Expand Up @@ -478,6 +479,10 @@ def name(self) -> str:
"""Returns the column name."""
return self.column_identifier.name

@property
def raw_name(self) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same as the the public vs private api comment

return self.column_identifier.raw_name

@name.setter
def name(self, n: str) -> None:
self.column_identifier = ColumnIdentifier(n)
Expand Down
26 changes: 13 additions & 13 deletions tests/integ/scala/test_datatype_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@
_STRUCTURE_DATAFRAME_QUERY = """
select
object_construct('k1', 1) :: map(varchar, int) as map,
object_construct('A', 'foo', 'B', 0.05) :: object(A varchar, B float) as obj,
object_construct('A', 'foo', 'b', 0.05) :: object(A varchar, b float) as obj,
[1.0, 3.1, 4.5] :: array(float) as arr
"""

Expand All @@ -71,10 +71,10 @@ def _create_test_dataframe(s):
object_construct(lit("k1"), lit(1))
.cast(MapType(StringType(), IntegerType(), structured=True))
.alias("map"),
object_construct(lit("A"), lit("foo"), lit("B"), lit(0.05))
object_construct(lit("A"), lit("foo"), lit("b"), lit(0.05))
.cast(
StructType(
[StructField("A", StringType()), StructField("B", DoubleType())],
[StructField("A", StringType()), StructField("b", DoubleType())],
structured=True,
)
)
Expand Down Expand Up @@ -106,7 +106,7 @@ def _create_test_dataframe(s):
StructType(
[
StructField("A", StringType(16777216), nullable=True),
StructField("B", DoubleType(), nullable=True),
StructField('"b"', DoubleType(), nullable=True),
Copy link
Contributor

@sfc-gh-aling sfc-gh-aling Nov 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we need extra double quote here?

],
structured=True,
),
Expand Down Expand Up @@ -386,7 +386,7 @@ def test_structured_dtypes_select(structured_type_session, examples):
flattened_df = df.select(
df.map["k1"].alias("value1"),
df.obj["A"].alias("a"),
col("obj")["B"].alias("b"),
col("obj")["b"].alias("b"),
df.arr[0].alias("value2"),
df.arr[1].alias("value3"),
col("arr")[2].alias("value4"),
Expand All @@ -395,7 +395,7 @@ def test_structured_dtypes_select(structured_type_session, examples):
[
StructField("VALUE1", LongType(), nullable=True),
StructField("A", StringType(16777216), nullable=True),
StructField("B", DoubleType(), nullable=True),
StructField("b", DoubleType(), nullable=True),
StructField("VALUE2", DoubleType(), nullable=True),
StructField("VALUE3", DoubleType(), nullable=True),
StructField("VALUE4", DoubleType(), nullable=True),
Expand Down Expand Up @@ -424,12 +424,12 @@ def test_structured_dtypes_pandas(structured_type_session, structured_type_suppo
if structured_type_support:
assert (
pdf.to_json()
== '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","B":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}'
== '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","b":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}'
)
else:
assert (
pdf.to_json()
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"B\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}'
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"b\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}'
)


Expand Down Expand Up @@ -467,7 +467,7 @@ def test_structured_dtypes_iceberg(
)
assert save_ddl[0][0] == (
f"create or replace ICEBERG TABLE {table_name.upper()} (\n\t"
"MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, B DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n "
"MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, b DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n "
"EXTERNAL_VOLUME = 'PYTHON_CONNECTOR_ICEBERG_EXVOL'\n CATALOG = 'SNOWFLAKE'\n "
"BASE_LOCATION = 'python_connector_merge_gate/';"
)
Expand Down Expand Up @@ -728,8 +728,8 @@ def test_structured_dtypes_iceberg_create_from_values(
_, __, expected_schema = STRUCTURED_TYPES_EXAMPLES[True]
table_name = f"snowpark_structured_dtypes_{uuid.uuid4().hex[:5]}"
data = [
({"x": 1}, {"A": "a", "B": 1}, [1, 1, 1]),
({"x": 2}, {"A": "b", "B": 2}, [2, 2, 2]),
({"x": 1}, {"A": "a", "b": 1}, [1, 1, 1]),
({"x": 2}, {"A": "b", "b": 2}, [2, 2, 2]),
]
try:
create_df = structured_type_session.create_dataframe(
Expand Down Expand Up @@ -940,6 +940,6 @@ def test_structured_type_print_schema(
" | |-- key: StringType()\n"
" | |-- value: ArrayType\n"
" | | |-- element: StructType\n"
' | | | |-- "FIELD1": StringType() (nullable = True)\n'
' | | | |-- "FIELD2": LongType() (nullable = True)\n'
' | | | |-- "Field1": StringType() (nullable = True)\n'
' | | | |-- "Field2": LongType() (nullable = True)\n'
)
Loading