From 3218765413e5bd6ab5c602a0ad19d5eb5f5902f0 Mon Sep 17 00:00:00 2001 From: Hassan Syyid Date: Thu, 10 Oct 2024 00:37:29 +0200 Subject: [PATCH] HG-3499: parse empty strings as booleans --- tap_hubspot_beta/client_base.py | 28 +++++++++++++++++++++++++++- tap_hubspot_beta/client_v1.py | 1 + tap_hubspot_beta/client_v2.py | 1 + tap_hubspot_beta/client_v3.py | 3 +++ 4 files changed, 32 insertions(+), 1 deletion(-) diff --git a/tap_hubspot_beta/client_base.py b/tap_hubspot_beta/client_base.py index f6e5d4b..1ea98f0 100644 --- a/tap_hubspot_beta/client_base.py +++ b/tap_hubspot_beta/client_base.py @@ -331,7 +331,33 @@ def stream_maps(self) -> List[StreamMap]: ) ] return self._stream_maps - + + def process_row_types(self,row) -> Dict[str, Any]: + schema = self.schema['properties'] + # If the row is null we ignore + if row is None: + return row + + for field, value in row.items(): + if field not in schema: + # Skip fields not found in the schema + continue + + field_info = schema[field] + field_type = field_info.get("type", ["null"])[0] + + if field_type == "boolean": + if value is None: + row[field] = False + elif not isinstance(value, bool): + # Attempt to cast to boolean + if value.lower() == "true": + row[field] = True + elif value == "" or value.lower() == "false": + row[field] = False + + return row + def is_first_sync(self): if self.stream_state.get("replication_key"): return False diff --git a/tap_hubspot_beta/client_v1.py b/tap_hubspot_beta/client_v1.py index b579c46..aeeb3a2 100644 --- a/tap_hubspot_beta/client_v1.py +++ b/tap_hubspot_beta/client_v1.py @@ -73,4 +73,5 @@ def post_process(self, row: dict, context: Optional[dict]) -> dict: dt_field = datetime.fromtimestamp(int(row[field]) / 1000) dt_field = dt_field.replace(tzinfo=None) row[field] = dt_field.isoformat() + row = self.process_row_types(row) return row diff --git a/tap_hubspot_beta/client_v2.py b/tap_hubspot_beta/client_v2.py index ef4059d..fffd14f 100644 --- a/tap_hubspot_beta/client_v2.py +++ b/tap_hubspot_beta/client_v2.py @@ -44,6 +44,7 @@ def post_process(self, row: dict, context: Optional[dict]) -> dict: row["updatedAt"] = row["hs_lastmodifieddate"] row["createdAt"] = row["createdate"] row["archived"] = False + row = self.process_row_types(row) return row def request_records(self, context: Optional[dict]) -> Iterable[dict]: diff --git a/tap_hubspot_beta/client_v3.py b/tap_hubspot_beta/client_v3.py index 1d6585d..81559f4 100644 --- a/tap_hubspot_beta/client_v3.py +++ b/tap_hubspot_beta/client_v3.py @@ -116,6 +116,7 @@ def post_process(self, row: dict, context: Optional[dict]) -> dict: del row["properties"] # store archived value in _hg_archived row["_hg_archived"] = False + row = self.process_row_types(row) return row def _sync_records( # noqa C901 # too complex @@ -241,6 +242,7 @@ def post_process(self, row: dict, context: Optional[dict]) -> dict: for name, value in row["properties"].items(): row[name] = value del row["properties"] + row = self.process_row_types(row) return row @@ -296,6 +298,7 @@ def post_process(self, row: dict, context: Optional[dict]) -> dict: for name, value in row["properties"].items(): row[name] = value del row["properties"] + row = self.process_row_types(row) return row class hubspotHistoryV3Stream(hubspotV3Stream):