From 90a100a7c3d554d1c674a90feaefdf42fd4c45ba Mon Sep 17 00:00:00 2001 From: dave-pollock <20267387+dave-pollock@users.noreply.github.com> Date: Wed, 1 Nov 2023 00:53:08 +1100 Subject: [PATCH] Fix SalesforceHook compatiblity with Pandas 2.x (#35145) In pandas 2.0.0, pandas.np was removed. Airflow 2.7.0 switched to pandas 2.x, however the SalesforceHook still has a reference to pandas.np, and is therefore broken in some cases after 2.7.0, resulting in an AttributeError if the _to_timestamp method is called. --- airflow/providers/salesforce/hooks/salesforce.py | 3 ++- tests/providers/salesforce/hooks/test_salesforce.py | 7 +++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/airflow/providers/salesforce/hooks/salesforce.py b/airflow/providers/salesforce/hooks/salesforce.py index bca3521226553..5bdbb6eaa815f 100644 --- a/airflow/providers/salesforce/hooks/salesforce.py +++ b/airflow/providers/salesforce/hooks/salesforce.py @@ -242,6 +242,7 @@ def _to_timestamp(cls, column: pd.Series) -> pd.Series: # between 0 and 10 are turned into timestamps # if the column cannot be converted, # just return the original column untouched + import numpy as np import pandas as pd try: @@ -259,7 +260,7 @@ def _to_timestamp(cls, column: pd.Series) -> pd.Series: try: converted.append(value.timestamp()) except (ValueError, AttributeError): - converted.append(pd.np.NaN) + converted.append(np.NaN) return pd.Series(converted, index=column.index) diff --git a/tests/providers/salesforce/hooks/test_salesforce.py b/tests/providers/salesforce/hooks/test_salesforce.py index b3792c612156f..0c1d9ede4c6f0 100644 --- a/tests/providers/salesforce/hooks/test_salesforce.py +++ b/tests/providers/salesforce/hooks/test_salesforce.py @@ -422,7 +422,9 @@ def test_write_object_to_file_ndjson_with_record_time(self, mock_data_frame, moc ) @patch( "pandas.DataFrame.from_records", - return_value=pd.DataFrame({"test": [1, 2, 3], "field_1": ["2019-01-01", "2019-01-02", "2019-01-03"]}), + return_value=pd.DataFrame( + {"test": [1, 2, 3, 4], "field_1": ["2019-01-01", "2019-01-02", "2019-01-03", "NaT"]} + ), ) def test_object_to_df_with_timestamp_conversion(self, mock_data_frame, mock_describe_object): obj_name = "obj_name" @@ -434,7 +436,8 @@ def test_object_to_df_with_timestamp_conversion(self, mock_data_frame, mock_desc mock_describe_object.assert_called_once_with(obj_name) pd.testing.assert_frame_equal( - data_frame, pd.DataFrame({"test": [1, 2, 3], "field_1": [1.546301e09, 1.546387e09, 1.546474e09]}) + data_frame, + pd.DataFrame({"test": [1, 2, 3, 4], "field_1": [1.546301e09, 1.546387e09, 1.546474e09, np.nan]}), ) @patch("airflow.providers.salesforce.hooks.salesforce.time.time", return_value=1.23)