From 865509bdb9941fd252c76adde784d17adfad3623 Mon Sep 17 00:00:00 2001 From: Vinicius Mesel <4984147+vmesel@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:50:04 -0300 Subject: [PATCH 1/7] Adds new backoff generator and logging --- tap_hubspot_beta/client_base.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tap_hubspot_beta/client_base.py b/tap_hubspot_beta/client_base.py index d64411a..5c715c9 100644 --- a/tap_hubspot_beta/client_base.py +++ b/tap_hubspot_beta/client_base.py @@ -137,6 +137,26 @@ def selected_properties(self): selected_properties.append(key[-1]) return selected_properties + def log_rate_limit(self, resp): + """ + Prints out the content for the rate limits headers in the response. + """ + for header in [ + 'x-hubspot-ratelimit-interval-milliseconds', + 'x-hubspot-ratelimit-max', + 'x-hubspot-ratelimit-remaining', + 'x-hubspot-ratelimit-secondly', + 'x-hubspot-ratelimit-secondly-remaining', + ]: + self.logger.info("Header: {}, value: {}".format( + header, + resp.headers.get(header) + )) + self.logger.info("429 response from path: {} - {}".format( + resp.url, + resp.content + )) + def validate_response(self, response: requests.Response) -> None: """Validate HTTP response.""" if 500 <= response.status_code < 600 or response.status_code in [429, 401, 104]: @@ -146,6 +166,10 @@ def validate_response(self, response: requests.Response) -> None: ) raise RetriableAPIError(msg) + if 429 == response.status_code: + self.log_rate_limit(response) + raise RetriableAPIError(f"429 Too Many Requests, response {response.text}") + elif 400 <= response.status_code < 500: msg = ( f"{response.status_code} Client Error: " @@ -248,6 +272,9 @@ def finalize_state_progress_markers(stream_or_partition_state: dict) -> Optional return finalize_state_progress_markers(state) + def backoff_wait_generator(self): + return backoff.expo(base=3, factor=3) + def request_decorator(self, func): """Instantiate a decorator for handling request failures.""" decorator = backoff.on_exception( From 3f227e6c427985355f0b884d70934ba68f306aa9 Mon Sep 17 00:00:00 2001 From: Vinicius Mesel <4984147+vmesel@users.noreply.github.com> Date: Mon, 27 Nov 2023 15:57:30 -0300 Subject: [PATCH 2/7] Removes 429 from the first validation --- tap_hubspot_beta/client_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_hubspot_beta/client_base.py b/tap_hubspot_beta/client_base.py index 5c715c9..8340573 100644 --- a/tap_hubspot_beta/client_base.py +++ b/tap_hubspot_beta/client_base.py @@ -159,7 +159,7 @@ def log_rate_limit(self, resp): def validate_response(self, response: requests.Response) -> None: """Validate HTTP response.""" - if 500 <= response.status_code < 600 or response.status_code in [429, 401, 104]: + if 500 <= response.status_code < 600 or response.status_code in [401, 104]: msg = ( f"{response.status_code} Server Error: " f"{response.reason} for path: {self.path}" From d695bb064b6ac84a39f575d096aa6e30abcfdbbe Mon Sep 17 00:00:00 2001 From: Vinicius Mesel <4984147+vmesel@users.noreply.github.com> Date: Tue, 28 Nov 2023 10:30:13 -0300 Subject: [PATCH 3/7] Changes backoff algorithm to constant backoff --- tap_hubspot_beta/client_base.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tap_hubspot_beta/client_base.py b/tap_hubspot_beta/client_base.py index 8340573..7478780 100644 --- a/tap_hubspot_beta/client_base.py +++ b/tap_hubspot_beta/client_base.py @@ -273,7 +273,11 @@ def finalize_state_progress_markers(stream_or_partition_state: dict) -> Optional finalize_state_progress_markers(state) def backoff_wait_generator(self): - return backoff.expo(base=3, factor=3) + return backoff.constant(interval=15) + + @property + def backoff_max_tries(self): + return 10 def request_decorator(self, func): """Instantiate a decorator for handling request failures.""" From f76d2cdfe703ff957dc5711d6f0bd617f11c58ec Mon Sep 17 00:00:00 2001 From: Keyna Rafael <95432445+keyn4@users.noreply.github.com> Date: Thu, 11 Apr 2024 15:01:52 -0500 Subject: [PATCH 4/7] fix list_id typing (#47) --- tap_hubspot_beta/streams.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tap_hubspot_beta/streams.py b/tap_hubspot_beta/streams.py index 3c8ab17..cb893c4 100644 --- a/tap_hubspot_beta/streams.py +++ b/tap_hubspot_beta/streams.py @@ -1095,7 +1095,7 @@ class ListMembershipV3Stream(hubspotV3Stream): schema = th.PropertiesList( th.Property("results", th.CustomType({"type": ["array", "string"]})), - th.Property("list_id", th.IntegerType), + th.Property("list_id", th.StringType), ).to_dict() def post_process(self, row, context): From 7d786066a6a588b46b04122d5a01f1faab7e92c9 Mon Sep 17 00:00:00 2001 From: Keyna Rafael <95432445+keyn4@users.noreply.github.com> Date: Fri, 12 Apr 2024 12:34:33 -0500 Subject: [PATCH 5/7] fix listId typing in lists_v3 (#48) --- tap_hubspot_beta/streams.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tap_hubspot_beta/streams.py b/tap_hubspot_beta/streams.py index cb893c4..316d7ef 100644 --- a/tap_hubspot_beta/streams.py +++ b/tap_hubspot_beta/streams.py @@ -1054,17 +1054,17 @@ def replication_key(self): return "updatedAt" schema = th.PropertiesList( - th.Property("listId", th.NumberType()), - th.Property("listVersion", th.NumberType()), - th.Property("createdAt", th.DateTimeType()), - th.Property("updatedAt", th.DateTimeType()), - th.Property("filtersUpdateAt", th.DateTimeType()), - th.Property("processingStatus", th.StringType()), - th.Property("createdById", th.NumberType()), - th.Property("updatedById", th.NumberType()), - th.Property("processingType", th.StringType()), - th.Property("objectTypeId", th.StringType()), - th.Property("name", th.StringType()), + th.Property("listId", th.StringType), + th.Property("listVersion", th.NumberType), + th.Property("createdAt", th.DateTimeType), + th.Property("updatedAt", th.DateTimeType), + th.Property("filtersUpdateAt", th.DateTimeType), + th.Property("processingStatus", th.StringType), + th.Property("createdById", th.NumberType), + th.Property("updatedById", th.NumberType), + th.Property("processingType", th.StringType), + th.Property("objectTypeId", th.StringType), + th.Property("name", th.StringType), th.Property("additionalProperties", th.CustomType({"type": ["object", "string"]})), ).to_dict() From 48f08c9acb8ac7a8f528f07134f685b1f2d87c64 Mon Sep 17 00:00:00 2001 From: Keyna Rafael <95432445+keyn4@users.noreply.github.com> Date: Wed, 17 Apr 2024 10:30:13 -0500 Subject: [PATCH 6/7] improve validate response logs, print request curl if request fails (#50) --- pyproject.toml | 1 + tap_hubspot_beta/client_base.py | 15 +++++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 35af3fe..6d60dea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ python = "<3.11,>=3.7.1" requests = "^2.25.1" singer-sdk = "^0.4.4" "backports.cached-property" = "^1.0.1" +curlify = "^2.2.1" [tool.poetry.dev-dependencies] pytest = "^6.2.5" diff --git a/tap_hubspot_beta/client_base.py b/tap_hubspot_beta/client_base.py index 7478780..7b3d49f 100644 --- a/tap_hubspot_beta/client_base.py +++ b/tap_hubspot_beta/client_base.py @@ -1,6 +1,7 @@ """REST client handling, including hubspotStream base class.""" import copy import logging +import curlify import requests import backoff @@ -159,23 +160,29 @@ def log_rate_limit(self, resp): def validate_response(self, response: requests.Response) -> None: """Validate HTTP response.""" - if 500 <= response.status_code < 600 or response.status_code in [401, 104]: + if 500 <= response.status_code < 600 or response.status_code in [400, 401, 104]: msg = ( f"{response.status_code} Server Error: " f"{response.reason} for path: {self.path}" ) - raise RetriableAPIError(msg) + curl_command = curlify.to_curl(response.request) + logging.error(f"Response code: {response.status_code}, info: {response.text}") + logging.error(f"CURL command for failed request: {curl_command}") + raise RetriableAPIError(f"Msg {msg}, response {response.text}") if 429 == response.status_code: self.log_rate_limit(response) raise RetriableAPIError(f"429 Too Many Requests, response {response.text}") - elif 400 <= response.status_code < 500: + elif 400 < response.status_code < 500: msg = ( f"{response.status_code} Client Error: " f"{response.reason} for path: {self.path}" ) - raise FatalAPIError(msg) + curl_command = curlify.to_curl(response.request) + logging.error(f"Response code: {response.status_code}, info: {response.text}") + logging.error(f"CURL command for failed request: {curl_command}") + raise FatalAPIError(RetriableAPIError(f"Msg {msg}, response {response.text}")) @staticmethod def extract_type(field): From 481bbcd753ba0c4f513bf9d6ec2f691267ae19d4 Mon Sep 17 00:00:00 2001 From: Keyna Rafael <95432445+keyn4@users.noreply.github.com> Date: Wed, 8 May 2024 09:58:49 -0500 Subject: [PATCH 7/7] fix createdById and updatedById typing (#54) --- tap_hubspot_beta/streams.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tap_hubspot_beta/streams.py b/tap_hubspot_beta/streams.py index 316d7ef..50f0bd6 100644 --- a/tap_hubspot_beta/streams.py +++ b/tap_hubspot_beta/streams.py @@ -1060,8 +1060,8 @@ def replication_key(self): th.Property("updatedAt", th.DateTimeType), th.Property("filtersUpdateAt", th.DateTimeType), th.Property("processingStatus", th.StringType), - th.Property("createdById", th.NumberType), - th.Property("updatedById", th.NumberType), + th.Property("createdById", th.StringType), + th.Property("updatedById", th.StringType), th.Property("processingType", th.StringType), th.Property("objectTypeId", th.StringType), th.Property("name", th.StringType),