Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
proper flaresolverr handling
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-WinnfieldX committed Mar 7, 2024
1 parent 5589a99 commit 606f669
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 23 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5.1.91"
__version__ = "5.1.92"
36 changes: 15 additions & 21 deletions cyberdrop_dl/clients/scraper_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from functools import wraps
from typing import TYPE_CHECKING, Dict, Optional

from aiohttp import ClientSession, ClientResponse
from aiohttp import ClientSession
from bs4 import BeautifulSoup
from multidict import CIMultiDictProxy
from yarl import URL
Expand Down Expand Up @@ -60,7 +60,7 @@ async def on_request_end(session, trace_config_ctx, params):
self.trace_configs.append(trace_config)

@limiter
async def flaresolverr(self, domain: str, url: URL, client_session: ClientSession) -> ClientResponse:
async def flaresolverr(self, domain: str, url: URL, client_session: ClientSession) -> str:
"""Returns the resolved URL from the given URL"""
if not self.client_manager.flaresolverr:
raise ScrapeFailure(status="DDOS-Guard", message="FlareSolverr is not configured")
Expand All @@ -69,12 +69,13 @@ async def flaresolverr(self, domain: str, url: URL, client_session: ClientSessio
data = {"cmd": "request.get", "url": str(url), "maxTimeout": 60000}

async with client_session.post(f"http://{self.client_manager.flaresolverr}/v1", headers=headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy, data=data) as response:
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
proxy=self.client_manager.proxy, json=data) as response:
json_obj = await response.json()
status = json_obj.get("status")
if status != "ok":
raise ScrapeFailure(status="DDOS-Guard", message="Failed to resolve URL with flaresolverr")
return response

return json_obj.get("solution").get("response")

@limiter
async def get_BS4(self, domain: str, url: URL, client_session: ClientSession) -> BeautifulSoup:
Expand All @@ -84,7 +85,8 @@ async def get_BS4(self, domain: str, url: URL, client_session: ClientSession) ->
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url)
response_text = await self.flaresolverr(domain, url)
return BeautifulSoup(response_text, 'html.parser')
content_type = response.headers.get('Content-Type')
assert content_type is not None
if not any(s in content_type.lower() for s in ("html", "text")):
Expand All @@ -97,10 +99,7 @@ async def get_BS4_and_return_URL(self, domain: str, url: URL, client_session: Cl
"""Returns a BeautifulSoup object and response URL from the given URL"""
async with client_session.get(url, headers=self._headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy) as response:
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url)
await self.client_manager.check_http_status(response)
content_type = response.headers.get('Content-Type')
assert content_type is not None
if not any(s in content_type.lower() for s in ("html", "text")):
Expand All @@ -115,10 +114,7 @@ async def get_json(self, domain: str, url: URL, params: Optional[Dict] = None, h

async with client_session.get(url, headers=headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy, params=params) as response:
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url)
await self.client_manager.check_http_status(response)
content_type = response.headers.get('Content-Type')
assert content_type is not None
if 'json' not in content_type.lower():
Expand All @@ -133,7 +129,8 @@ async def get_text(self, domain: str, url: URL, client_session: ClientSession) -
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url)
response_text = await self.flaresolverr(domain, url)
return response_text
text = await response.text()
return text

Expand All @@ -142,10 +139,7 @@ async def post_data(self, domain: str, url: URL, client_session: ClientSession,
"""Returns a JSON object from the given URL when posting data"""
async with client_session.post(url, headers=self._headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy, data=data) as response:
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url)
await self.client_manager.check_http_status(response)
if req_resp:
return json.loads(await response.content.read())
else:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl"
version = "5.1.91"
version = "5.1.92"
description = "Bulk downloader for multiple file hosts"
authors = ["Jules Winnfield <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit 606f669

Please sign in to comment.