diff --git a/aurelio_sdk/client.py b/aurelio_sdk/client.py index 81fa1d4..084f8c1 100644 --- a/aurelio_sdk/client.py +++ b/aurelio_sdk/client.py @@ -112,7 +112,7 @@ def chunk( timeout=timeout, ) if response.status_code == 200: - return ChunkResponse(**response.json()) + return ChunkResponse(**response.json()) # Success elif response.status_code == 429: raise ApiRateLimitError( status_code=response.status_code, @@ -211,8 +211,6 @@ def extract_file( fields={**fields, "file": (filename, file)} ) - document_id = None - response = None session_timeout = wait + 1 if wait > 0 else None for attempt in range(1, retries + 1): @@ -230,7 +228,7 @@ def extract_file( if response.status_code == 200: extract_response = ExtractResponse(**response.json()) - document_id = extract_response.document.id + break # Success elif response.status_code == 429: raise ApiRateLimitError( status_code=response.status_code, @@ -259,23 +257,6 @@ def extract_file( status_code=response.status_code, base_url=self.base_url, ) - if wait == 0: - return extract_response - - # If the document is already processed or polling is disabled, - # return the response - if ( - extract_response.status in ["completed", "failed"] - or polling_interval <= 0 - ): - return extract_response - - # Wait for the document to complete processing - return self.wait_for( - document_id=document_id, - wait=wait, - polling_interval=polling_interval, - ) except ApiRateLimitError as e: raise e except requests.exceptions.Timeout: @@ -293,9 +274,26 @@ def extract_file( else: logger.debug(f"Retrying due to exception (attempt {attempt}): {e}") continue # Retry - raise ApiError( - message=f"Failed to get response after {retries} retries", - base_url=self.base_url, + + if extract_response is None: + raise ApiError( + message=f"Failed to receive a valid response after {retries} retries", + base_url=self.base_url, + ) + + if wait == 0: + return extract_response + + # If the document is already processed or polling is disabled, + # return the response + if extract_response.status in ["completed", "failed"] or polling_interval <= 0: + return extract_response + + # Wait for the document to complete processing + return self.wait_for( + document_id=extract_response.document.id, + wait=wait, + polling_interval=polling_interval, ) def extract_url( @@ -341,8 +339,6 @@ def extract_url( initial_wait = WAIT_TIME_BEFORE_POLLING if polling_interval > 0 else wait data["wait"] = initial_wait - document_id = None - response = None session_timeout = wait + 1 if wait > 0 else None for attempt in range(1, retries + 1): @@ -353,7 +349,8 @@ def extract_url( if response.status_code == 200: extract_response = ExtractResponse(**response.json()) - document_id = extract_response.document.id + break # Success + elif response.status_code == 429: raise ApiRateLimitError( status_code=response.status_code, @@ -383,23 +380,6 @@ def extract_url( base_url=self.base_url, ) - if wait == 0: - return extract_response - - # If the document is already processed or polling is disabled, - # return the response - if ( - extract_response.status in ["completed", "failed"] - or polling_interval <= 0 - ): - return extract_response - - # Wait for the document to complete processing - return self.wait_for( - document_id=document_id, - wait=wait, - polling_interval=polling_interval, - ) except ApiRateLimitError as e: raise e except requests.exceptions.Timeout: @@ -420,9 +400,26 @@ def extract_url( else: logger.debug(f"Retrying due to exception (attempt {attempt}): {e}") continue # Retry - raise ApiError( - message=f"Failed to get response after {retries} retries", - base_url=self.base_url, + + if extract_response is None: + raise ApiError( + message=f"Failed to receive a valid response after {retries} retries", + base_url=self.base_url, + ) + + if wait == 0: + return extract_response + + # If the document is already processed or polling is disabled, + # return the response + if extract_response.status in ["completed", "failed"] or polling_interval <= 0: + return extract_response + + # Wait for the document to complete processing + return self.wait_for( + document_id=extract_response.document.id, + wait=wait, + polling_interval=polling_interval, ) def get_document( diff --git a/aurelio_sdk/client_async.py b/aurelio_sdk/client_async.py index ef88a46..28cc6f4 100644 --- a/aurelio_sdk/client_async.py +++ b/aurelio_sdk/client_async.py @@ -252,6 +252,7 @@ async def extract_file( filename=filename, content_type=file_payload.content_type, ) + # Handles file bytes else: logger.debug("Uploading file bytes") try: @@ -274,9 +275,11 @@ async def extract_file( async with session.post( client_url, data=data, headers=self.headers ) as response: + logger.debug("Calling API") if response.status == 200: extract_response = ExtractResponse(**await response.json()) document_id = extract_response.document.id + break # Success elif response.status == 429: raise ApiRateLimitError( status_code=response.status, diff --git a/examples/02_extract.ipynb b/examples/02_extract.ipynb index b12b72a..4dfa46e 100644 --- a/examples/02_extract.ipynb +++ b/examples/02_extract.ipynb @@ -23,7 +23,7 @@ "client = AurelioClient(\n", " api_key=os.environ[\"AURELIO_API_KEY\"],\n", " debug=True,\n", - " base_url=\"http://localhost:8001\",\n", + " base_url=\"http://localhost:8000\",\n", ")\n", "\n", "print(client.base_url)\n" @@ -57,7 +57,7 @@ "response_pdf_file: ExtractResponse = client.extract_file(\n", " file_path=file_path, quality=\"low\", chunk=True, wait=-1, polling_interval=5\n", ")\n", - "print(response_pdf_file)" + "response_pdf_file" ] }, { diff --git a/examples/03_extract_async.ipynb b/examples/03_extract_async.ipynb index 13a9513..dc4310a 100644 --- a/examples/03_extract_async.ipynb +++ b/examples/03_extract_async.ipynb @@ -8,7 +8,7 @@ "source": [ "import sys\n", "\n", - "sys.path.append(\"..\")" + "sys.path.append(\"..\")\n" ] }, { @@ -20,7 +20,6 @@ "from aurelio_sdk import AsyncAurelioClient\n", "import os\n", "\n", - "\n", "base_url = \"http://localhost:8000\"\n", "client = AsyncAurelioClient(api_key=os.environ[\"AURELIO_API_KEY\"], base_url=base_url, debug=True)" ] @@ -49,12 +48,13 @@ "\n", "from aurelio_sdk import ExtractResponse\n", "\n", - "file_path = \"data/pdf/adaptive_semantic_search.pdf\"\n", + "# file_path = \"data/pdf/adaptive_semantic_search.pdf\"\n", + "file_path = \"data/pdf/test_pdf.pdf\"\n", "\n", - "# NOTE: wait=-1 means wait till completion, polling_interval=15 means polling every 15 seconds for status till completion\n", + "# NOTE: wait=-1 means wait till completion, polling_interval=5 means polling every 5 seconds for status till completion\n", "\n", "response_pdf_file: ExtractResponse = await client.extract_file(\n", - " file_path=file_path, quality=\"high\", chunk=True, wait=-1, polling_interval=15\n", + " file_path=file_path, quality=\"low\", chunk=False, wait=-1, polling_interval=5\n", ")\n", "\n", "response_pdf_file" @@ -90,46 +90,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\u001b[36m[AurelioSDK] [DEBUG] -- 2024-11-12 13:13:16 - at client_async.py:434 in extract_url(): Retrying due to exception (attempt 1): Cannot connect to host localhost:8001 ssl:default [Multiple exceptions: [Errno 61] Connect call failed ('::1', 8001, 0, 0), [Errno 61] Connect call failed ('127.0.0.1', 8001)]\u001b[0m\n", - "\u001b[36m[AurelioSDK] [DEBUG] -- 2024-11-12 13:13:16 - at client_async.py:434 in extract_url(): Retrying due to exception (attempt 2): Cannot connect to host localhost:8001 ssl:default [Multiple exceptions: [Errno 61] Connect call failed ('127.0.0.1', 8001), [Errno 61] Connect call failed ('::1', 8001, 0, 0)]\u001b[0m\n" - ] - }, - { - "ename": "ApiError", - "evalue": "[AurelioSDK] API request failed: Failed to get response after 3 retries: Cannot connect to host localhost:8001 ssl:default [Multiple exceptions: [Errno 61] Connect call failed ('::1', 8001, 0, 0), [Errno 61] Connect call failed ('127.0.0.1', 8001)]. Base API URL: http://localhost:8001.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mConnectionRefusedError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/connector.py:1091\u001b[0m, in \u001b[0;36mTCPConnector._wrap_create_connection\u001b[0;34m(self, addr_infos, req, timeout, client_error, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m ceil_timeout(\n\u001b[1;32m 1089\u001b[0m timeout\u001b[38;5;241m.\u001b[39msock_connect, ceil_threshold\u001b[38;5;241m=\u001b[39mtimeout\u001b[38;5;241m.\u001b[39mceil_threshold\n\u001b[1;32m 1090\u001b[0m ):\n\u001b[0;32m-> 1091\u001b[0m sock \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m aiohappyeyeballs\u001b[38;5;241m.\u001b[39mstart_connection(\n\u001b[1;32m 1092\u001b[0m addr_infos\u001b[38;5;241m=\u001b[39maddr_infos,\n\u001b[1;32m 1093\u001b[0m local_addr_infos\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_local_addr_infos,\n\u001b[1;32m 1094\u001b[0m happy_eyeballs_delay\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_happy_eyeballs_delay,\n\u001b[1;32m 1095\u001b[0m interleave\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_interleave,\n\u001b[1;32m 1096\u001b[0m loop\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_loop,\n\u001b[1;32m 1097\u001b[0m )\n\u001b[1;32m 1098\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_loop\u001b[38;5;241m.\u001b[39mcreate_connection(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs, sock\u001b[38;5;241m=\u001b[39msock)\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohappyeyeballs/impl.py:123\u001b[0m, in \u001b[0;36mstart_connection\u001b[0;34m(addr_infos, local_addr_infos, happy_eyeballs_delay, interleave, loop)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mall\u001b[39m(\n\u001b[1;32m 120\u001b[0m \u001b[38;5;28misinstance\u001b[39m(exc, \u001b[38;5;167;01mOSError\u001b[39;00m) \u001b[38;5;129;01mand\u001b[39;00m exc\u001b[38;5;241m.\u001b[39merrno \u001b[38;5;241m==\u001b[39m first_errno\n\u001b[1;32m 121\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m exc \u001b[38;5;129;01min\u001b[39;00m all_exceptions\n\u001b[1;32m 122\u001b[0m ):\n\u001b[0;32m--> 123\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mOSError\u001b[39;00m(first_errno, msg)\n\u001b[1;32m 124\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(first_exception, \u001b[38;5;167;01mRuntimeError\u001b[39;00m) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mall\u001b[39m(\n\u001b[1;32m 125\u001b[0m \u001b[38;5;28misinstance\u001b[39m(exc, \u001b[38;5;167;01mRuntimeError\u001b[39;00m) \u001b[38;5;28;01mfor\u001b[39;00m exc \u001b[38;5;129;01min\u001b[39;00m all_exceptions\n\u001b[1;32m 126\u001b[0m ):\n", - "\u001b[0;31mConnectionRefusedError\u001b[0m: [Errno 61] Multiple exceptions: [Errno 61] Connect call failed ('::1', 8001, 0, 0), [Errno 61] Connect call failed ('127.0.0.1', 8001)", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mClientConnectorError\u001b[0m Traceback (most recent call last)", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/examples/../aurelio_sdk/client_async.py:385\u001b[0m, in \u001b[0;36mAsyncAurelioClient.extract_url\u001b[0;34m(self, url, quality, chunk, wait, polling_interval, retries)\u001b[0m\n\u001b[1;32m 384\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 385\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m session\u001b[38;5;241m.\u001b[39mpost(\n\u001b[1;32m 386\u001b[0m client_url, data\u001b[38;5;241m=\u001b[39mdata, headers\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mheaders\n\u001b[1;32m 387\u001b[0m ) \u001b[38;5;28;01mas\u001b[39;00m response:\n\u001b[1;32m 388\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m200\u001b[39m:\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/client.py:1359\u001b[0m, in \u001b[0;36m_BaseRequestContextManager.__aenter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1358\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__aenter__\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m _RetType:\n\u001b[0;32m-> 1359\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_resp: _RetType \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_coro\n\u001b[1;32m 1360\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_resp\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__aenter__\u001b[39m()\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/client.py:663\u001b[0m, in \u001b[0;36mClientSession._request\u001b[0;34m(self, method, str_or_url, params, data, json, cookies, headers, skip_auto_headers, auth, allow_redirects, max_redirects, compress, chunked, expect100, raise_for_status, read_until_eof, proxy, proxy_auth, timeout, verify_ssl, fingerprint, ssl_context, ssl, server_hostname, proxy_headers, trace_request_ctx, read_bufsize, auto_decompress, max_line_size, max_field_size)\u001b[0m\n\u001b[1;32m 659\u001b[0m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mwith\u001b[39;00m ceil_timeout(\n\u001b[1;32m 660\u001b[0m real_timeout\u001b[38;5;241m.\u001b[39mconnect,\n\u001b[1;32m 661\u001b[0m ceil_threshold\u001b[38;5;241m=\u001b[39mreal_timeout\u001b[38;5;241m.\u001b[39mceil_threshold,\n\u001b[1;32m 662\u001b[0m ):\n\u001b[0;32m--> 663\u001b[0m conn \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_connector\u001b[38;5;241m.\u001b[39mconnect(\n\u001b[1;32m 664\u001b[0m req, traces\u001b[38;5;241m=\u001b[39mtraces, timeout\u001b[38;5;241m=\u001b[39mreal_timeout\n\u001b[1;32m 665\u001b[0m )\n\u001b[1;32m 666\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mTimeoutError \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/connector.py:563\u001b[0m, in \u001b[0;36mBaseConnector.connect\u001b[0;34m(self, req, traces, timeout)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 563\u001b[0m proto \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_connection(req, traces, timeout)\n\u001b[1;32m 564\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_closed:\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/connector.py:1032\u001b[0m, in \u001b[0;36mTCPConnector._create_connection\u001b[0;34m(self, req, traces, timeout)\u001b[0m\n\u001b[1;32m 1031\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1032\u001b[0m _, proto \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_create_direct_connection(req, traces, timeout)\n\u001b[1;32m 1034\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m proto\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/connector.py:1366\u001b[0m, in \u001b[0;36mTCPConnector._create_direct_connection\u001b[0;34m(self, req, traces, timeout, client_error)\u001b[0m\n\u001b[1;32m 1365\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m last_exc \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m-> 1366\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m last_exc\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/connector.py:1335\u001b[0m, in \u001b[0;36mTCPConnector._create_direct_connection\u001b[0;34m(self, req, traces, timeout, client_error)\u001b[0m\n\u001b[1;32m 1334\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1335\u001b[0m transp, proto \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_wrap_create_connection(\n\u001b[1;32m 1336\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_factory,\n\u001b[1;32m 1337\u001b[0m timeout\u001b[38;5;241m=\u001b[39mtimeout,\n\u001b[1;32m 1338\u001b[0m ssl\u001b[38;5;241m=\u001b[39msslcontext,\n\u001b[1;32m 1339\u001b[0m addr_infos\u001b[38;5;241m=\u001b[39maddr_infos,\n\u001b[1;32m 1340\u001b[0m server_hostname\u001b[38;5;241m=\u001b[39mserver_hostname,\n\u001b[1;32m 1341\u001b[0m req\u001b[38;5;241m=\u001b[39mreq,\n\u001b[1;32m 1342\u001b[0m client_error\u001b[38;5;241m=\u001b[39mclient_error,\n\u001b[1;32m 1343\u001b[0m )\n\u001b[1;32m 1344\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ClientConnectorError, asyncio\u001b[38;5;241m.\u001b[39mTimeoutError) \u001b[38;5;28;01mas\u001b[39;00m exc:\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/.venv/lib/python3.12/site-packages/aiohttp/connector.py:1106\u001b[0m, in \u001b[0;36mTCPConnector._wrap_create_connection\u001b[0;34m(self, addr_infos, req, timeout, client_error, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1105\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n\u001b[0;32m-> 1106\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m client_error(req\u001b[38;5;241m.\u001b[39mconnection_key, exc) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mexc\u001b[39;00m\n", - "\u001b[0;31mClientConnectorError\u001b[0m: Cannot connect to host localhost:8001 ssl:default [Multiple exceptions: [Errno 61] Connect call failed ('::1', 8001, 0, 0), [Errno 61] Connect call failed ('127.0.0.1', 8001)]", - "\nThe above exception was the direct cause of the following exception:\n", - "\u001b[0;31mApiError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[3], line 6\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;66;03m# From URL\u001b[39;00m\n\u001b[1;32m 5\u001b[0m url \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://arxiv.org/pdf/2408.15291\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 6\u001b[0m response_pdf_url: ExtractResponse \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m client\u001b[38;5;241m.\u001b[39mextract_url(\n\u001b[1;32m 7\u001b[0m url\u001b[38;5;241m=\u001b[39murl, quality\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlow\u001b[39m\u001b[38;5;124m\"\u001b[39m, chunk\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, wait\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m, polling_interval\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m\n\u001b[1;32m 8\u001b[0m )\n\u001b[1;32m 9\u001b[0m response_pdf_url\n", - "File \u001b[0;32m~/customers/aurelio/aurelio-sdk/examples/../aurelio_sdk/client_async.py:429\u001b[0m, in \u001b[0;36mAsyncAurelioClient.extract_url\u001b[0;34m(self, url, quality, chunk, wait, polling_interval, retries)\u001b[0m\n\u001b[1;32m 427\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 428\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m attempt \u001b[38;5;241m==\u001b[39m retries:\n\u001b[0;32m--> 429\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m ApiError(\n\u001b[1;32m 430\u001b[0m message\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFailed to get response after \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mretries\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m retries: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 431\u001b[0m base_url\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbase_url,\n\u001b[1;32m 432\u001b[0m ) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m 433\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 434\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\n\u001b[1;32m 435\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRetrying due to exception (attempt \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mattempt\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m): \u001b[39m\u001b[38;5;132;01m{\u001b[39;00me\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 436\u001b[0m )\n", - "\u001b[0;31mApiError\u001b[0m: [AurelioSDK] API request failed: Failed to get response after 3 retries: Cannot connect to host localhost:8001 ssl:default [Multiple exceptions: [Errno 61] Connect call failed ('::1', 8001, 0, 0), [Errno 61] Connect call failed ('127.0.0.1', 8001)]. Base API URL: http://localhost:8001." - ] - } - ], + "outputs": [], "source": [ "# From URL PDF\n", "from aurelio_sdk import ExtractResponse\n", diff --git a/examples/data/pdf/test_pdf.pdf b/examples/data/pdf/test_pdf.pdf new file mode 100644 index 0000000..e799411 Binary files /dev/null and b/examples/data/pdf/test_pdf.pdf differ diff --git a/pyproject.toml b/pyproject.toml index f25d061..e83c6ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aurelio-sdk" -version = "0.0.13" +version = "0.0.14" description = "Aurelio Platform SDK" authors = ["Team "] readme = "README.md" @@ -8,7 +8,7 @@ packages = [{ include = "aurelio_sdk" }] [project] name = "aurelio-sdk" -version = "0.0.13" +version = "0.0.14" license = { file = "LICENSE" } [tool.poetry.dependencies] diff --git a/tests/client/test_extract.py b/tests/client/test_extract.py index 319de7b..fcf97f3 100644 --- a/tests/client/test_extract.py +++ b/tests/client/test_extract.py @@ -92,7 +92,7 @@ def test_extract_pdf_file_no_chunks(client: AurelioClient): assert dict_response["status"] == "completed" # Usage - # assert dict_response["usage"]["tokens"] is None # TODO: Should be None if chunk is False + assert dict_response["usage"]["tokens"] is None assert dict_response["usage"]["pages"] == 7 assert dict_response["usage"]["seconds"] is None diff --git a/tests/client_async/test_async_client.py b/tests/client_async/test_async_client.py index eb8f0e5..b8da4d7 100644 --- a/tests/client_async/test_async_client.py +++ b/tests/client_async/test_async_client.py @@ -1,14 +1,11 @@ # Tests for AsyncAurelioClient -import asyncio import os -from pathlib import Path import pytest -from aioresponses import aioresponses from dotenv import load_dotenv from aurelio_sdk.client_async import AsyncAurelioClient -from aurelio_sdk.exceptions import ApiError, ApiRateLimitError +from aurelio_sdk.exceptions import ApiError from aurelio_sdk.schema import ChunkingOptions load_dotenv() @@ -25,7 +22,7 @@ def client() -> AsyncAurelioClient: @pytest.fixture def no_api_key_env(): - """Temporarily remove API key from environment""" + # Temporarily remove API key from environment original_key = os.environ.get("AURELIO_API_KEY") os.environ["AURELIO_API_KEY"] = "" yield @@ -61,73 +58,3 @@ async def test_async_client_empty_base_url(): client = AsyncAurelioClient(api_key="test_api_key", base_url="") assert client.api_key == "test_api_key" assert client.base_url == "https://api.aurelio.ai" - - -# SJ - as a placeholder for now -# @pytest.mark.asyncio -# async def test_async_client_rate_limit_error(client: AsyncAurelioClient): -# with pytest.raises(ApiRateLimitError): -# if client.base_url not in [ -# "https://api.aurelio.ai", -# "https://staging.api.aurelio.ai", -# ]: -# # Rate limits are available only in the cloud environments -# # This is for local testing -# client = AsyncAurelioClient( -# api_key=os.environ["AURELIO_API_KEY_PRODUCTION"], -# base_url=os.environ["BASE_URL_PRODUCTION"], -# ) - -# file_path = Path(__file__).parent.parent / "data" / "test_pdf.pdf" -# tasks: set[asyncio.Task] = set() -# for _ in range(30): -# tasks.add( -# asyncio.create_task( -# client.extract_file( -# file_path=file_path, -# quality="low", -# chunk=False, -# wait=-1, -# polling_interval=2, -# ) -# ) -# ) -# try: -# done, pending = await asyncio.wait( -# tasks, return_when=asyncio.FIRST_EXCEPTION -# ) -# for task in done: -# exception = task.exception() -# if isinstance(exception, ApiRateLimitError): -# tasks.remove(task) -# raise exception # Re-raise to be caught by pytest.raises -# elif exception: -# tasks.remove(task) -# raise exception -# else: -# tasks.remove(task) -# finally: -# for task in pending: -# task.cancel() -# # Await canceled tasks to suppress CancelledError -# await asyncio.gather(*pending, return_exceptions=True) - - -# @pytest.mark.asyncio -# async def test_async_client_retry_on_server_error(client): -# """Test that the client retries on 5xx server errors""" -# with aioresponses() as mocked: -# # Mock 3 consecutive 500 errors -# for _ in range(3): -# mocked.post( -# f"{client.base_url}/v1/extract/url", -# status=500, -# body="Internal Server Error", -# ) - -# with pytest.raises(ApiError) as exc_info: -# await client.extract_url( -# url="https://123.com", quality="low", chunk=True, wait=-1 -# ) - -# assert "Internal Server Error" in str(exc_info.value) diff --git a/tests/client_async/test_async_extract.py b/tests/client_async/test_async_extract.py index 9dfd4b3..6ad7507 100644 --- a/tests/client_async/test_async_extract.py +++ b/tests/client_async/test_async_extract.py @@ -93,7 +93,7 @@ async def test_extract_pdf_file_no_chunks(client: AsyncAurelioClient): assert dict_response["status"] == "completed" # Usage - # assert dict_response["usage"]["tokens"] is None #TODO: this should be None, if chunk is False + assert dict_response["usage"]["tokens"] is None assert dict_response["usage"]["pages"] == 7 assert dict_response["usage"]["seconds"] is None