-
Notifications
You must be signed in to change notification settings - Fork 112
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SNOW-1803811: Allow mixed-case field names for struct type columns #2640
base: main
Are you sure you want to change the base?
Changes from 4 commits
4ac428f
3c01bb1
3be6719
b5ac8cc
08e7f27
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -291,7 +291,7 @@ def convert_sp_to_sf_type(datatype: DataType) -> str: | |
if isinstance(datatype, StructType): | ||
if datatype.structured: | ||
fields = ", ".join( | ||
f"{field.name} {convert_sp_to_sf_type(field.datatype)}" | ||
f"{field.raw_name} {convert_sp_to_sf_type(field.datatype)}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I kinda have the same problem as Afroz's on BCR.. previous we have name upper cased but now we do not, this won't break users when user references keys in the map object? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As far as I know nobody is using StructType columns yet due to them requiring structured types be enabled for their account. |
||
for field in datatype.fields | ||
) | ||
return f"OBJECT({fields})" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -398,6 +398,7 @@ class ColumnIdentifier: | |
"""Represents a column identifier.""" | ||
|
||
def __init__(self, normalized_name: str) -> None: | ||
self.raw_name = normalized_name | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is internal usage only then I prefer marking them as private |
||
self.normalized_name = quote_name(normalized_name) | ||
|
||
@property | ||
|
@@ -478,6 +479,10 @@ def name(self) -> str: | |
"""Returns the column name.""" | ||
return self.column_identifier.name | ||
|
||
@property | ||
def raw_name(self) -> str: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as the the public vs private api comment |
||
return self.column_identifier.raw_name | ||
|
||
@name.setter | ||
def name(self, n: str) -> None: | ||
self.column_identifier = ColumnIdentifier(n) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -60,7 +60,7 @@ | |
_STRUCTURE_DATAFRAME_QUERY = """ | ||
select | ||
object_construct('k1', 1) :: map(varchar, int) as map, | ||
object_construct('A', 'foo', 'B', 0.05) :: object(A varchar, B float) as obj, | ||
object_construct('A', 'foo', 'b', 0.05) :: object(A varchar, b float) as obj, | ||
[1.0, 3.1, 4.5] :: array(float) as arr | ||
""" | ||
|
||
|
@@ -71,10 +71,10 @@ def _create_test_dataframe(s): | |
object_construct(lit("k1"), lit(1)) | ||
.cast(MapType(StringType(), IntegerType(), structured=True)) | ||
.alias("map"), | ||
object_construct(lit("A"), lit("foo"), lit("B"), lit(0.05)) | ||
object_construct(lit("A"), lit("foo"), lit("b"), lit(0.05)) | ||
.cast( | ||
StructType( | ||
[StructField("A", StringType()), StructField("B", DoubleType())], | ||
[StructField("A", StringType()), StructField("b", DoubleType())], | ||
structured=True, | ||
) | ||
) | ||
|
@@ -106,7 +106,7 @@ def _create_test_dataframe(s): | |
StructType( | ||
[ | ||
StructField("A", StringType(16777216), nullable=True), | ||
StructField("B", DoubleType(), nullable=True), | ||
StructField('"b"', DoubleType(), nullable=True), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need extra double quote here? |
||
], | ||
structured=True, | ||
), | ||
|
@@ -386,7 +386,7 @@ def test_structured_dtypes_select(structured_type_session, examples): | |
flattened_df = df.select( | ||
df.map["k1"].alias("value1"), | ||
df.obj["A"].alias("a"), | ||
col("obj")["B"].alias("b"), | ||
col("obj")["b"].alias("b"), | ||
df.arr[0].alias("value2"), | ||
df.arr[1].alias("value3"), | ||
col("arr")[2].alias("value4"), | ||
|
@@ -395,7 +395,7 @@ def test_structured_dtypes_select(structured_type_session, examples): | |
[ | ||
StructField("VALUE1", LongType(), nullable=True), | ||
StructField("A", StringType(16777216), nullable=True), | ||
StructField("B", DoubleType(), nullable=True), | ||
StructField("b", DoubleType(), nullable=True), | ||
StructField("VALUE2", DoubleType(), nullable=True), | ||
StructField("VALUE3", DoubleType(), nullable=True), | ||
StructField("VALUE4", DoubleType(), nullable=True), | ||
|
@@ -424,12 +424,12 @@ def test_structured_dtypes_pandas(structured_type_session, structured_type_suppo | |
if structured_type_support: | ||
assert ( | ||
pdf.to_json() | ||
== '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","B":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}' | ||
== '{"MAP":{"0":[["k1",1.0]]},"OBJ":{"0":{"A":"foo","b":0.05}},"ARR":{"0":[1.0,3.1,4.5]}}' | ||
) | ||
else: | ||
assert ( | ||
pdf.to_json() | ||
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"B\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}' | ||
== '{"MAP":{"0":"{\\n \\"k1\\": 1\\n}"},"OBJ":{"0":"{\\n \\"A\\": \\"foo\\",\\n \\"b\\": 5.000000000000000e-02\\n}"},"ARR":{"0":"[\\n 1.000000000000000e+00,\\n 3.100000000000000e+00,\\n 4.500000000000000e+00\\n]"}}' | ||
) | ||
|
||
|
||
|
@@ -467,7 +467,7 @@ def test_structured_dtypes_iceberg( | |
) | ||
assert save_ddl[0][0] == ( | ||
f"create or replace ICEBERG TABLE {table_name.upper()} (\n\t" | ||
"MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, B DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n " | ||
"MAP MAP(STRING, LONG),\n\tOBJ OBJECT(A STRING, b DOUBLE),\n\tARR ARRAY(DOUBLE)\n)\n " | ||
"EXTERNAL_VOLUME = 'PYTHON_CONNECTOR_ICEBERG_EXVOL'\n CATALOG = 'SNOWFLAKE'\n " | ||
"BASE_LOCATION = 'python_connector_merge_gate/';" | ||
) | ||
|
@@ -728,8 +728,8 @@ def test_structured_dtypes_iceberg_create_from_values( | |
_, __, expected_schema = STRUCTURED_TYPES_EXAMPLES[True] | ||
table_name = f"snowpark_structured_dtypes_{uuid.uuid4().hex[:5]}" | ||
data = [ | ||
({"x": 1}, {"A": "a", "B": 1}, [1, 1, 1]), | ||
({"x": 2}, {"A": "b", "B": 2}, [2, 2, 2]), | ||
({"x": 1}, {"A": "a", "b": 1}, [1, 1, 1]), | ||
({"x": 2}, {"A": "b", "b": 2}, [2, 2, 2]), | ||
] | ||
try: | ||
create_df = structured_type_session.create_dataframe( | ||
|
@@ -940,6 +940,6 @@ def test_structured_type_print_schema( | |
" | |-- key: StringType()\n" | ||
" | |-- value: ArrayType\n" | ||
" | | |-- element: StructType\n" | ||
' | | | |-- "FIELD1": StringType() (nullable = True)\n' | ||
' | | | |-- "FIELD2": LongType() (nullable = True)\n' | ||
' | | | |-- "Field1": StringType() (nullable = True)\n' | ||
' | | | |-- "Field2": LongType() (nullable = True)\n' | ||
) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
does this only affected structured types or all
StructType
s? If it is the latter, then it would be a bcr, no?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This only affects structured types because currently semi-structured objects don't become StructType columns, but instead get converted to MapType for some reason.