diff --git a/cyberdrop_dl/__init__.py b/cyberdrop_dl/__init__.py index a2a00c1c5..9c9b4d188 100644 --- a/cyberdrop_dl/__init__.py +++ b/cyberdrop_dl/__init__.py @@ -1 +1 @@ -__version__ = "5.1.85" \ No newline at end of file +__version__ = "5.1.86" \ No newline at end of file diff --git a/cyberdrop_dl/clients/errors.py b/cyberdrop_dl/clients/errors.py index 3e9987d96..a056bfcc5 100644 --- a/cyberdrop_dl/clients/errors.py +++ b/cyberdrop_dl/clients/errors.py @@ -10,6 +10,16 @@ class NoExtensionFailure(Exception): def __init__(self, *, message: str = "Extension missing for file"): self.message = message super().__init__(self.message) + + +class DDOSGuardFailure(Exception): + """This error will be thrown when DDoS-Guard is detected""" + + def __init__(self, status: int, message: str = "DDoS-Guard detected"): + self.status = status + self.message = message + super().__init__(self.message) + super().__init__(self.status) class DownloadFailure(Exception): diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index b7f5aa5c6..147cf65c2 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -7,12 +7,12 @@ from functools import wraps from typing import TYPE_CHECKING, Dict, Optional -from aiohttp import ClientSession +from aiohttp import ClientSession, ClientResponse from bs4 import BeautifulSoup from multidict import CIMultiDictProxy from yarl import URL -from cyberdrop_dl.clients.errors import InvalidContentTypeFailure +from cyberdrop_dl.clients.errors import InvalidContentTypeFailure, DDOSGuardFailure, ScrapeFailure from cyberdrop_dl.utils.utilities import log if TYPE_CHECKING: @@ -58,13 +58,33 @@ async def on_request_end(session, trace_config_ctx, params): trace_config.on_request_start.append(on_request_start) trace_config.on_request_end.append(on_request_end) self.trace_configs.append(trace_config) + + @limiter + async def flaresolverr(self, domain: str, url: URL, client_session: ClientSession) -> ClientResponse: + """Returns the resolved URL from the given URL""" + if not self.client_manager.flaresolverr: + raise ScrapeFailure(status="DDOS-Guard", message="FlareSolverr is not configured") + + headers = {**self._headers, **{"Content-Type": "application/json"}} + data = {"cmd": "request.get", "url": str(url), "maxTimeout": 60000} + + async with client_session.post(url, headers=headers, ssl=self.client_manager.ssl_context, + proxy=self.client_manager.proxy, data=data) as response: + try: + await self.client_manager.check_http_status(response) + except DDOSGuardFailure: + raise ScrapeFailure(status="DDOS-Guard", message="Failed to resolve URL with flaresolverr") + return response @limiter async def get_BS4(self, domain: str, url: URL, client_session: ClientSession) -> BeautifulSoup: """Returns a BeautifulSoup object from the given URL""" async with client_session.get(url, headers=self._headers, ssl=self.client_manager.ssl_context, proxy=self.client_manager.proxy) as response: - await self.client_manager.check_http_status(response) + try: + await self.client_manager.check_http_status(response) + except DDOSGuardFailure: + response = await self.flaresolverr(domain, url, client_session) content_type = response.headers.get('Content-Type') assert content_type is not None if not any(s in content_type.lower() for s in ("html", "text")): diff --git a/cyberdrop_dl/managers/args_manager.py b/cyberdrop_dl/managers/args_manager.py index 24750a07e..6405ecab0 100644 --- a/cyberdrop_dl/managers/args_manager.py +++ b/cyberdrop_dl/managers/args_manager.py @@ -10,6 +10,7 @@ def __init__(self): self.parsed_args = {} self.proxy = "" + self.flaresolverr = "" self.all_configs = False self.retry = False @@ -67,6 +68,8 @@ def startup(self) -> None: if self.parsed_args['proxy']: self.proxy = self.parsed_args['proxy'] + if self.parsed_args['flaresolverr']: + self.flaresolverr = self.parsed_args['flaresolverr'] self.other_links = self.parsed_args['links'] diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index 0e556cbb1..22cffd309 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -11,7 +11,7 @@ from aiolimiter import AsyncLimiter from cyberdrop_dl.clients.download_client import DownloadClient -from cyberdrop_dl.clients.errors import DownloadFailure +from cyberdrop_dl.clients.errors import DownloadFailure, DDOSGuardFailure from cyberdrop_dl.clients.scraper_client import ScraperClient from cyberdrop_dl.utils.utilities import CustomHTTPStatus @@ -35,6 +35,7 @@ def __init__(self, manager: Manager): self.ssl_context = ssl.create_default_context(cafile=certifi.where()) if self.verify_ssl else False self.cookies = aiohttp.CookieJar(quote_cookie=False) self.proxy = manager.config_manager.global_settings_data['General']['proxy'] if not manager.args_manager.proxy else manager.args_manager.proxy + self.flaresolverr = manager.config_manager.global_settings_data['General']['flaresolverr'] if not manager.args_manager.flaresolverr else manager.args_manager.flaresolverr self.domain_rate_limits = { "bunkrr": AsyncLimiter(5, 1), @@ -91,7 +92,7 @@ async def check_http_status(self, response: ClientResponse, download: bool = Fal response_text = await response.text() if "DDoS-Guard" in response_text: - raise DownloadFailure(status="DDOS-Guard", message="DDoS-Guard detected") + raise DDOSGuardFailure(status="DDOS-Guard", message="DDoS-Guard detected") if not headers.get('Content-Type'): raise DownloadFailure(status=CustomHTTPStatus.IM_A_TEAPOT, message="No content-type in response header") diff --git a/cyberdrop_dl/utils/args/args.py b/cyberdrop_dl/utils/args/args.py index 45f3a382e..874d67e96 100644 --- a/cyberdrop_dl/utils/args/args.py +++ b/cyberdrop_dl/utils/args/args.py @@ -11,6 +11,7 @@ def parse_args() -> argparse.Namespace: general.add_argument("-V", "--version", action="version", version=f"%(prog)s {VERSION}") general.add_argument("--config", type=str, help="name of config to load", default="") general.add_argument("--proxy", type=str, help="manually specify proxy string", default="") + general.add_argument("--flaresolverr", type=str, help="IP:PORT for flaresolverr", default="") general.add_argument("--no-ui", action="store_true", help="Disables the UI/Progress view entirely", default=False) general.add_argument("--download", action="store_true", help="Skip the UI and go straight to downloading", default=False) general.add_argument("--download-all-configs", action="store_true", help="Skip the UI and go straight to downloading (runs all configs sequentially)", default=False) diff --git a/cyberdrop_dl/utils/args/config_definitions.py b/cyberdrop_dl/utils/args/config_definitions.py index 7947b29ca..a9e00ecc3 100644 --- a/cyberdrop_dl/utils/args/config_definitions.py +++ b/cyberdrop_dl/utils/args/config_definitions.py @@ -124,6 +124,7 @@ "allow_insecure_connections": False, "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0", "proxy": "", + "flaresolverr": "", "max_file_name_length": 95, "max_folder_name_length": 60, "required_free_space": 5, diff --git a/pyproject.toml b/pyproject.toml index 72f7655bf..fed19d1be 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cyberdrop-dl" -version = "5.1.85" +version = "5.1.86" description = "Bulk downloader for multiple file hosts" authors = ["Jules Winnfield "] readme = "README.md"