From f9bf039e82703baa5310d9dbe35bdf5b2e52a7c5 Mon Sep 17 00:00:00 2001 From: Jules-WinnfieldX Date: Wed, 6 Mar 2024 20:16:27 -0700 Subject: [PATCH] integrate flaresolverr into the rest of the scraper functions. --- cyberdrop_dl/__init__.py | 2 +- cyberdrop_dl/clients/scraper_client.py | 20 ++++++++++++++++---- pyproject.toml | 2 +- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cyberdrop_dl/__init__.py b/cyberdrop_dl/__init__.py index 55f4770f5..444568572 100644 --- a/cyberdrop_dl/__init__.py +++ b/cyberdrop_dl/__init__.py @@ -1 +1 @@ -__version__ = "5.1.87" \ No newline at end of file +__version__ = "5.1.88" \ No newline at end of file diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 8d8c7d3e6..c2e7e442f 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -97,7 +97,10 @@ async def get_BS4_and_return_URL(self, domain: str, url: URL, client_session: Cl """Returns a BeautifulSoup object and response URL from the given URL""" async with client_session.get(url, headers=self._headers, ssl=self.client_manager.ssl_context, proxy=self.client_manager.proxy) as response: - await self.client_manager.check_http_status(response) + try: + await self.client_manager.check_http_status(response) + except DDOSGuardFailure: + response = await self.flaresolverr(domain, url, client_session) content_type = response.headers.get('Content-Type') assert content_type is not None if not any(s in content_type.lower() for s in ("html", "text")): @@ -112,7 +115,10 @@ async def get_json(self, domain: str, url: URL, params: Optional[Dict] = None, h async with client_session.get(url, headers=headers, ssl=self.client_manager.ssl_context, proxy=self.client_manager.proxy, params=params) as response: - await self.client_manager.check_http_status(response) + try: + await self.client_manager.check_http_status(response) + except DDOSGuardFailure: + response = await self.flaresolverr(domain, url, client_session) content_type = response.headers.get('Content-Type') assert content_type is not None if 'json' not in content_type.lower(): @@ -124,8 +130,11 @@ async def get_text(self, domain: str, url: URL, client_session: ClientSession) - """Returns a text object from the given URL""" async with client_session.get(url, headers=self._headers, ssl=self.client_manager.ssl_context, proxy=self.client_manager.proxy) as response: + try: + await self.client_manager.check_http_status(response) + except DDOSGuardFailure: + response = await self.flaresolverr(domain, url, client_session) text = await response.text() - await self.client_manager.check_http_status(response) return text @limiter @@ -133,7 +142,10 @@ async def post_data(self, domain: str, url: URL, client_session: ClientSession, """Returns a JSON object from the given URL when posting data""" async with client_session.post(url, headers=self._headers, ssl=self.client_manager.ssl_context, proxy=self.client_manager.proxy, data=data) as response: - await self.client_manager.check_http_status(response) + try: + await self.client_manager.check_http_status(response) + except DDOSGuardFailure: + response = await self.flaresolverr(domain, url, client_session) if req_resp: return json.loads(await response.content.read()) else: diff --git a/pyproject.toml b/pyproject.toml index c1807dfaa..b0cd64066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cyberdrop-dl" -version = "5.1.87" +version = "5.1.88" description = "Bulk downloader for multiple file hosts" authors = ["Jules Winnfield "] readme = "README.md"