Merge pull request #22 from aurelio-labs/simonas/usage-updates

chore: retry updates
aurelio-labs · Nov 13, 2024 · d6df1ca · d6df1ca
2 parents ba48ec2 + dbb8688
commit d6df1ca
Show file tree

Hide file tree

Showing 9 changed files with 62 additions and 172 deletions.
diff --git a/aurelio_sdk/client.py b/aurelio_sdk/client.py
@@ -112,7 +112,7 @@ def chunk(
                     timeout=timeout,
                 )
                 if response.status_code == 200:
-                    return ChunkResponse(**response.json())
+                    return ChunkResponse(**response.json())  # Success
                 elif response.status_code == 429:
                     raise ApiRateLimitError(
                         status_code=response.status_code,
@@ -211,8 +211,6 @@ def extract_file(
                 fields={**fields, "file": (filename, file)}
             )
 
-        document_id = None
-        response = None
         session_timeout = wait + 1 if wait > 0 else None
 
         for attempt in range(1, retries + 1):
@@ -230,7 +228,7 @@ def extract_file(
 
                 if response.status_code == 200:
                     extract_response = ExtractResponse(**response.json())
-                    document_id = extract_response.document.id
+                    break  # Success
                 elif response.status_code == 429:
                     raise ApiRateLimitError(
                         status_code=response.status_code,
@@ -259,23 +257,6 @@ def extract_file(
                         status_code=response.status_code,
                         base_url=self.base_url,
                     )
-                if wait == 0:
-                    return extract_response
-
-                # If the document is already processed or polling is disabled,
-                # return the response
-                if (
-                    extract_response.status in ["completed", "failed"]
-                    or polling_interval <= 0
-                ):
-                    return extract_response
-
-                # Wait for the document to complete processing
-                return self.wait_for(
-                    document_id=document_id,
-                    wait=wait,
-                    polling_interval=polling_interval,
-                )
             except ApiRateLimitError as e:
                 raise e
             except requests.exceptions.Timeout:
@@ -293,9 +274,26 @@ def extract_file(
                 else:
                     logger.debug(f"Retrying due to exception (attempt {attempt}): {e}")
                     continue  # Retry
-        raise ApiError(
-            message=f"Failed to get response after {retries} retries",
-            base_url=self.base_url,
+
+        if extract_response is None:
+            raise ApiError(
+                message=f"Failed to receive a valid response after {retries} retries",
+                base_url=self.base_url,
+            )
+
+        if wait == 0:
+            return extract_response
+
+        # If the document is already processed or polling is disabled,
+        # return the response
+        if extract_response.status in ["completed", "failed"] or polling_interval <= 0:
+            return extract_response
+
+        # Wait for the document to complete processing
+        return self.wait_for(
+            document_id=extract_response.document.id,
+            wait=wait,
+            polling_interval=polling_interval,
         )
 
     def extract_url(
@@ -341,8 +339,6 @@ def extract_url(
         initial_wait = WAIT_TIME_BEFORE_POLLING if polling_interval > 0 else wait
         data["wait"] = initial_wait
 
-        document_id = None
-        response = None
         session_timeout = wait + 1 if wait > 0 else None
 
         for attempt in range(1, retries + 1):
@@ -353,7 +349,8 @@ def extract_url(
 
                 if response.status_code == 200:
                     extract_response = ExtractResponse(**response.json())
-                    document_id = extract_response.document.id
+                    break  # Success
+
                 elif response.status_code == 429:
                     raise ApiRateLimitError(
                         status_code=response.status_code,
@@ -383,23 +380,6 @@ def extract_url(
                         base_url=self.base_url,
                     )
 
-                if wait == 0:
-                    return extract_response
-
-                # If the document is already processed or polling is disabled,
-                # return the response
-                if (
-                    extract_response.status in ["completed", "failed"]
-                    or polling_interval <= 0
-                ):
-                    return extract_response
-
-                # Wait for the document to complete processing
-                return self.wait_for(
-                    document_id=document_id,
-                    wait=wait,
-                    polling_interval=polling_interval,
-                )
             except ApiRateLimitError as e:
                 raise e
             except requests.exceptions.Timeout:
@@ -420,9 +400,26 @@ def extract_url(
                 else:
                     logger.debug(f"Retrying due to exception (attempt {attempt}): {e}")
                     continue  # Retry
-        raise ApiError(
-            message=f"Failed to get response after {retries} retries",
-            base_url=self.base_url,
+
+        if extract_response is None:
+            raise ApiError(
+                message=f"Failed to receive a valid response after {retries} retries",
+                base_url=self.base_url,
+            )
+
+        if wait == 0:
+            return extract_response
+
+        # If the document is already processed or polling is disabled,
+        # return the response
+        if extract_response.status in ["completed", "failed"] or polling_interval <= 0:
+            return extract_response
+
+        # Wait for the document to complete processing
+        return self.wait_for(
+            document_id=extract_response.document.id,
+            wait=wait,
+            polling_interval=polling_interval,
         )
 
     def get_document(

diff --git a/aurelio_sdk/client_async.py b/aurelio_sdk/client_async.py
@@ -252,6 +252,7 @@ async def extract_file(
                         filename=filename,
                         content_type=file_payload.content_type,
                     )
+                # Handles file bytes
                 else:
                     logger.debug("Uploading file bytes")
                     try:
@@ -274,9 +275,11 @@ async def extract_file(
                     async with session.post(
                         client_url, data=data, headers=self.headers
                     ) as response:
+                        logger.debug("Calling API")
                         if response.status == 200:
                             extract_response = ExtractResponse(**await response.json())
                             document_id = extract_response.document.id
+                            break  # Success
                         elif response.status == 429:
                             raise ApiRateLimitError(
                                 status_code=response.status,

diff --git a/examples/02_extract.ipynb b/examples/02_extract.ipynb
@@ -23,7 +23,7 @@
     "client = AurelioClient(\n",
     "    api_key=os.environ[\"AURELIO_API_KEY\"],\n",
     "    debug=True,\n",
-    "    base_url=\"http://localhost:8001\",\n",
+    "    base_url=\"http://localhost:8000\",\n",
     ")\n",
     "\n",
     "print(client.base_url)\n"
@@ -57,7 +57,7 @@
     "response_pdf_file: ExtractResponse = client.extract_file(\n",
     "    file_path=file_path, quality=\"low\", chunk=True, wait=-1, polling_interval=5\n",
     ")\n",
-    "print(response_pdf_file)"
+    "response_pdf_file"
    ]
   },
   {