Skip to content

Commit

Permalink
Merge pull request #22 from aurelio-labs/simonas/usage-updates
Browse files Browse the repository at this point in the history
chore: retry updates
  • Loading branch information
simjak authored Nov 13, 2024
2 parents ba48ec2 + dbb8688 commit d6df1ca
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 172 deletions.
91 changes: 44 additions & 47 deletions aurelio_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def chunk(
timeout=timeout,
)
if response.status_code == 200:
return ChunkResponse(**response.json())
return ChunkResponse(**response.json()) # Success
elif response.status_code == 429:
raise ApiRateLimitError(
status_code=response.status_code,
Expand Down Expand Up @@ -211,8 +211,6 @@ def extract_file(
fields={**fields, "file": (filename, file)}
)

document_id = None
response = None
session_timeout = wait + 1 if wait > 0 else None

for attempt in range(1, retries + 1):
Expand All @@ -230,7 +228,7 @@ def extract_file(

if response.status_code == 200:
extract_response = ExtractResponse(**response.json())
document_id = extract_response.document.id
break # Success
elif response.status_code == 429:
raise ApiRateLimitError(
status_code=response.status_code,
Expand Down Expand Up @@ -259,23 +257,6 @@ def extract_file(
status_code=response.status_code,
base_url=self.base_url,
)
if wait == 0:
return extract_response

# If the document is already processed or polling is disabled,
# return the response
if (
extract_response.status in ["completed", "failed"]
or polling_interval <= 0
):
return extract_response

# Wait for the document to complete processing
return self.wait_for(
document_id=document_id,
wait=wait,
polling_interval=polling_interval,
)
except ApiRateLimitError as e:
raise e
except requests.exceptions.Timeout:
Expand All @@ -293,9 +274,26 @@ def extract_file(
else:
logger.debug(f"Retrying due to exception (attempt {attempt}): {e}")
continue # Retry
raise ApiError(
message=f"Failed to get response after {retries} retries",
base_url=self.base_url,

if extract_response is None:
raise ApiError(
message=f"Failed to receive a valid response after {retries} retries",
base_url=self.base_url,
)

if wait == 0:
return extract_response

# If the document is already processed or polling is disabled,
# return the response
if extract_response.status in ["completed", "failed"] or polling_interval <= 0:
return extract_response

# Wait for the document to complete processing
return self.wait_for(
document_id=extract_response.document.id,
wait=wait,
polling_interval=polling_interval,
)

def extract_url(
Expand Down Expand Up @@ -341,8 +339,6 @@ def extract_url(
initial_wait = WAIT_TIME_BEFORE_POLLING if polling_interval > 0 else wait
data["wait"] = initial_wait

document_id = None
response = None
session_timeout = wait + 1 if wait > 0 else None

for attempt in range(1, retries + 1):
Expand All @@ -353,7 +349,8 @@ def extract_url(

if response.status_code == 200:
extract_response = ExtractResponse(**response.json())
document_id = extract_response.document.id
break # Success

elif response.status_code == 429:
raise ApiRateLimitError(
status_code=response.status_code,
Expand Down Expand Up @@ -383,23 +380,6 @@ def extract_url(
base_url=self.base_url,
)

if wait == 0:
return extract_response

# If the document is already processed or polling is disabled,
# return the response
if (
extract_response.status in ["completed", "failed"]
or polling_interval <= 0
):
return extract_response

# Wait for the document to complete processing
return self.wait_for(
document_id=document_id,
wait=wait,
polling_interval=polling_interval,
)
except ApiRateLimitError as e:
raise e
except requests.exceptions.Timeout:
Expand All @@ -420,9 +400,26 @@ def extract_url(
else:
logger.debug(f"Retrying due to exception (attempt {attempt}): {e}")
continue # Retry
raise ApiError(
message=f"Failed to get response after {retries} retries",
base_url=self.base_url,

if extract_response is None:
raise ApiError(
message=f"Failed to receive a valid response after {retries} retries",
base_url=self.base_url,
)

if wait == 0:
return extract_response

# If the document is already processed or polling is disabled,
# return the response
if extract_response.status in ["completed", "failed"] or polling_interval <= 0:
return extract_response

# Wait for the document to complete processing
return self.wait_for(
document_id=extract_response.document.id,
wait=wait,
polling_interval=polling_interval,
)

def get_document(
Expand Down
3 changes: 3 additions & 0 deletions aurelio_sdk/client_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ async def extract_file(
filename=filename,
content_type=file_payload.content_type,
)
# Handles file bytes
else:
logger.debug("Uploading file bytes")
try:
Expand All @@ -274,9 +275,11 @@ async def extract_file(
async with session.post(
client_url, data=data, headers=self.headers
) as response:
logger.debug("Calling API")
if response.status == 200:
extract_response = ExtractResponse(**await response.json())
document_id = extract_response.document.id
break # Success
elif response.status == 429:
raise ApiRateLimitError(
status_code=response.status,
Expand Down
4 changes: 2 additions & 2 deletions examples/02_extract.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
"client = AurelioClient(\n",
" api_key=os.environ[\"AURELIO_API_KEY\"],\n",
" debug=True,\n",
" base_url=\"http://localhost:8001\",\n",
" base_url=\"http://localhost:8000\",\n",
")\n",
"\n",
"print(client.base_url)\n"
Expand Down Expand Up @@ -57,7 +57,7 @@
"response_pdf_file: ExtractResponse = client.extract_file(\n",
" file_path=file_path, quality=\"low\", chunk=True, wait=-1, polling_interval=5\n",
")\n",
"print(response_pdf_file)"
"response_pdf_file"
]
},
{
Expand Down
Loading

0 comments on commit d6df1ca

Please sign in to comment.