Skip to content

Commit

Permalink
[DAR-2161][External] Exponential backoff for external storage registr…
Browse files Browse the repository at this point in the history
…ation (#856)

* Added tenacity library for backoff

* Exponential backoff for external storage registration

* Corrected chunk_size

* Update backoff parameters

* Feedback

* Move retrying to API call

* Remove unused imports

* Undid testing changes

* Poetry lock

* Test case
  • Loading branch information
JBWilkie authored Jun 14, 2024
1 parent 0ea03b4 commit 38b1583
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 3 deletions.
23 changes: 23 additions & 0 deletions darwin/backend_v2.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
from urllib import parse

from requests.exceptions import HTTPError
from requests.models import Response
from tenacity import RetryCallState, retry, stop_after_attempt, wait_exponential_jitter

from darwin.datatypes import ItemId


Expand All @@ -17,6 +21,19 @@ def wrapper(self, *args, **kwargs) -> Callable:
return wrapper


def log_rate_limit_exceeded(retry_state: RetryCallState):
wait_time = retry_state.next_action.sleep
print(f"Rate limit exceeded. Retrying in {wait_time:.2f} seconds...")


def retry_if_status_code_429(retry_state: RetryCallState):
exception = retry_state.outcome.exception()
if isinstance(exception, HTTPError):
response: Response = exception.response
return response.status_code == 429
return False


class BackendV2:
def __init__(self, client: "Client", default_team): # noqa F821
self._client = client
Expand Down Expand Up @@ -238,6 +255,12 @@ def import_annotation(
f"v2/teams/{team_slug}/items/{item_id}/import", payload=payload
)

@retry(
wait=wait_exponential_jitter(initial=60, max=300),
stop=stop_after_attempt(10),
retry=retry_if_status_code_429,
before_sleep=log_rate_limit_exceeded,
)
@inject_default_team_slug
def register_items(self, payload: Dict[str, Any], team_slug: str) -> None:
"""
Expand Down
19 changes: 17 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ tqdm = "^4.64.1"
types-pyyaml = "^6.0.12.9"
types-requests = "^2.28.11.8"
upolygon = "0.1.11"
tenacity = "^8.3.0"

[tool.poetry.extras]
dev = ["black", "isort", "flake8", "mypy", "debugpy", "responses", "pytest", "flake8-pyproject", "pytest-rerunfailures", "ruff", "validate-pyproject"]
Expand Down
28 changes: 28 additions & 0 deletions tests/darwin/backend_v2_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from unittest.mock import Mock, call, patch

import pytest
from requests.exceptions import HTTPError
from requests.models import Response
from tenacity import RetryError

from darwin.backend_v2 import BackendV2


class TestBackendV2:
@patch("time.sleep", return_value=None)
def test_register_items_retries_on_429(self, mock_sleep):
mock_client = Mock()
mock_response = Mock(spec=Response)
mock_response.status_code = 429
mock_client._post_raw.side_effect = HTTPError(response=mock_response)

backend = BackendV2(mock_client, "team_slug")

payload = {"key": "value"}
with pytest.raises(RetryError):
backend.register_items(payload)

assert mock_client._post_raw.call_count == 10

expected_call = call("/v2/teams/team_slug/items/register_existing", payload)
assert mock_client._post_raw.call_args_list == [expected_call] * 10
4 changes: 3 additions & 1 deletion tests/darwin/dataset/remote_dataset_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -1065,7 +1065,7 @@ def test_register_files_with_blocked_items(self, remote_dataset: RemoteDatasetV2
},
status=200,
)
remote_dataset.register(
result = remote_dataset.register(
ObjectStore(
name="test",
prefix="test_prefix",
Expand All @@ -1076,3 +1076,5 @@ def test_register_files_with_blocked_items(self, remote_dataset: RemoteDatasetV2
{"item1": ["test.jpg"]},
multi_slotted=True,
)
assert len(result["registered"]) == 0
assert len(result["blocked"]) == 1

0 comments on commit 38b1583

Please sign in to comment.