Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
integrate flaresolverr into the rest of the scraper functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-WinnfieldX committed Mar 7, 2024
1 parent 442c405 commit f9bf039
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5.1.87"
__version__ = "5.1.88"
20 changes: 16 additions & 4 deletions cyberdrop_dl/clients/scraper_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ async def get_BS4_and_return_URL(self, domain: str, url: URL, client_session: Cl
"""Returns a BeautifulSoup object and response URL from the given URL"""
async with client_session.get(url, headers=self._headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy) as response:
await self.client_manager.check_http_status(response)
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url, client_session)
content_type = response.headers.get('Content-Type')
assert content_type is not None
if not any(s in content_type.lower() for s in ("html", "text")):
Expand All @@ -112,7 +115,10 @@ async def get_json(self, domain: str, url: URL, params: Optional[Dict] = None, h

async with client_session.get(url, headers=headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy, params=params) as response:
await self.client_manager.check_http_status(response)
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url, client_session)
content_type = response.headers.get('Content-Type')
assert content_type is not None
if 'json' not in content_type.lower():
Expand All @@ -124,16 +130,22 @@ async def get_text(self, domain: str, url: URL, client_session: ClientSession) -
"""Returns a text object from the given URL"""
async with client_session.get(url, headers=self._headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy) as response:
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url, client_session)
text = await response.text()
await self.client_manager.check_http_status(response)
return text

@limiter
async def post_data(self, domain: str, url: URL, client_session: ClientSession, data: Dict, req_resp: bool = True) -> Dict:
"""Returns a JSON object from the given URL when posting data"""
async with client_session.post(url, headers=self._headers, ssl=self.client_manager.ssl_context,
proxy=self.client_manager.proxy, data=data) as response:
await self.client_manager.check_http_status(response)
try:
await self.client_manager.check_http_status(response)
except DDOSGuardFailure:
response = await self.flaresolverr(domain, url, client_session)
if req_resp:
return json.loads(await response.content.read())
else:
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl"
version = "5.1.87"
version = "5.1.88"
description = "Bulk downloader for multiple file hosts"
authors = ["Jules Winnfield <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit f9bf039

Please sign in to comment.