diff --git a/CHANGELOG.md b/CHANGELOG.md index 4283d2cc5e..3ed84a9bc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ - `storage_serialization_policy` - Added support for nested data types to `DataFrame.print_schema` - Added support for `level` parameter to `DataFrame.print_schema` +- Added support for mixed case field names in struct type columns. - Improved flexibility of `DataFrameReader` and `DataFrameWriter` API by adding support for the following: - Added `format` method to `DataFrameReader` and `DataFrameWriter` to specify file format when loading or unloading results. - Added `load` method to `DataFrameReader` to work in conjunction with `format`. diff --git a/src/snowflake/snowpark/_internal/type_utils.py b/src/snowflake/snowpark/_internal/type_utils.py index 497a1ee20e..98983f55a7 100644 --- a/src/snowflake/snowpark/_internal/type_utils.py +++ b/src/snowflake/snowpark/_internal/type_utils.py @@ -291,7 +291,7 @@ def convert_sp_to_sf_type(datatype: DataType) -> str: if isinstance(datatype, StructType): if datatype.structured: fields = ", ".join( - f"{field.name} {convert_sp_to_sf_type(field.datatype)}" + f"{field.raw_name} {convert_sp_to_sf_type(field.datatype)}" for field in datatype.fields ) return f"OBJECT({fields})" diff --git a/src/snowflake/snowpark/types.py b/src/snowflake/snowpark/types.py index 652c5ceef5..24699561f6 100644 --- a/src/snowflake/snowpark/types.py +++ b/src/snowflake/snowpark/types.py @@ -407,6 +407,7 @@ class ColumnIdentifier: """Represents a column identifier.""" def __init__(self, normalized_name: str) -> None: + self.raw_name = normalized_name self.normalized_name = quote_name(normalized_name) @property @@ -487,6 +488,10 @@ def name(self) -> str: """Returns the column name.""" return self.column_identifier.name + @property + def raw_name(self) -> str: + return self.column_identifier.raw_name + @name.setter def name(self, n: str) -> None: self.column_identifier = ColumnIdentifier(n) diff --git a/tests/integ/scala/test_datatype_suite.py b/tests/integ/scala/test_datatype_suite.py index df50a2abfb..367b47d372 100644 --- a/tests/integ/scala/test_datatype_suite.py +++ b/tests/integ/scala/test_datatype_suite.py @@ -60,7 +60,7 @@ _STRUCTURE_DATAFRAME_QUERY = """ select object_construct('k1', 1) :: map(varchar, int) as map, - object_construct('A', 'foo', 'B', 0.05) :: object(A varchar, B float) as obj, + object_construct('A', 'foo', 'b', 0.05) :: object(A varchar, b float) as obj, [1.0, 3.1, 4.5] :: array(float) as arr """ @@ -71,10 +71,10 @@ def _create_test_dataframe(s): object_construct(lit("k1"), lit(1)) .cast(MapType(StringType(), IntegerType(), structured=True)) .alias("map"), - object_construct(lit("A"), lit("foo"), lit("B"), lit(0.05)) + object_construct(lit("A"), lit("foo"), lit("b"), lit(0.05)) .cast( StructType( - [StructField("A", StringType()), StructField("B", DoubleType())], + [StructField("A", StringType()), StructField("b", DoubleType())], structured=True, ) ) @@ -106,7 +106,7 @@ def _create_test_dataframe(s): StructType( [ StructField("A", StringType(16777216), nullable=True), - StructField("B", DoubleType(), nullable=True), + StructField('"b"', DoubleType(), nullable=True), ], structured=True, ), @@ -386,7 +386,7 @@ def test_structured_dtypes_select(structured_type_session, examples): flattened_df = df.select( df.map["k1"].alias("value1"), df.obj["A"].alias("a"), - col("obj")["B"].alias("b"), + col("obj")["b"].alias("b"), df.arr[0].alias("value2"), df.arr[1].alias("value3"), col("arr")[2].alias("value4"), @@ -395,7 +395,7 @@ def test_structured_dtypes_select(structured_type_session, examples): [ StructField("VALUE1", LongType(), nullable=True), StructField("A", StringType(16777216), nullable=True), - StructField("B", DoubleType(), nullable=True), + StructField("b", DoubleType(), nullable=True), StructField("VALUE2", DoubleType(), nullable=True), StructField("VALUE3", DoubleType(), nullable=True), StructField("VALUE4", DoubleType(), nullable=True), @@ -424,12 +424,12 @@ def test_structured_dtypes_pandas(structured_type_session, structured_type_suppo if structured_type_support: assert ( pdf.to_json() - == '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","B":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}' + == '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","b":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}' ) else: assert ( pdf.to_json() - == '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"B\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}' + == '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"b\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}' ) @@ -467,7 +467,7 @@ def test_structured_dtypes_iceberg( ) assert save_ddl[0][0] == ( f"create or replace ICEBERG TABLE {table_name.upper()} (\n\t" - "MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, B DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n " + "MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, b DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n " "EXTERNAL_VOLUME = 'PYTHON_CONNECTOR_ICEBERG_EXVOL'\n CATALOG = 'SNOWFLAKE'\n " "BASE_LOCATION = 'python_connector_merge_gate/';" ) @@ -733,8 +733,8 @@ def test_structured_dtypes_iceberg_create_from_values( _, __, expected_schema = STRUCTURED_TYPES_EXAMPLES[True] table_name = f"snowpark_structured_dtypes_{uuid.uuid4().hex[:5]}" data = [ - ({"x": 1}, {"A": "a", "B": 1}, [1, 1, 1]), - ({"x": 2}, {"A": "b", "B": 2}, [2, 2, 2]), + ({"x": 1}, {"A": "a", "b": 1}, [1, 1, 1]), + ({"x": 2}, {"A": "b", "b": 2}, [2, 2, 2]), ] try: create_df = structured_type_session.create_dataframe( @@ -945,8 +945,8 @@ def test_structured_type_print_schema( " | |-- key: StringType()\n" " | |-- value: ArrayType\n" " | | |-- element: StructType\n" - ' | | | |-- "FIELD1": StringType() (nullable = True)\n' - ' | | | |-- "FIELD2": LongType() (nullable = True)\n' + ' | | | |-- "Field1": StringType() (nullable = True)\n' + ' | | | |-- "Field2": LongType() (nullable = True)\n' ) # Test that depth works as expected