Change clients to use proxy for every request (why wasn't it before?)

Jules-WinnfieldX · Nov 21, 2023 · df29884 · df29884
1 parent af7dc9e
commit df29884
Show file tree

Hide file tree

Showing 5 changed files with 30 additions and 26 deletions.
diff --git a/cyberdrop_dl/__init__.py b/cyberdrop_dl/__init__.py
@@ -1 +1 @@
-__version__ = "4.2.225"
+__version__ = "4.2.226"
diff --git a/cyberdrop_dl/client/client.py b/cyberdrop_dl/client/client.py
@@ -44,14 +44,15 @@ async def wrapper(self, *args, **kwargs):
 class Client:
     """Creates a 'client' that can be referenced by scraping or download sessions"""
     def __init__(self, ratelimit: int, throttle: float, secure: bool, connect_timeout: int, read_timeout: int,
-                 user_agent: str):
+                 user_agent: str, proxy: str):
         self.connect_timeout = connect_timeout
         self.read_timeout = read_timeout
         self.ratelimit = ratelimit
         self.throttle = throttle
         self.simultaneous_session_limit = asyncio.Semaphore(50)
         self.user_agent = user_agent
         self.verify_ssl = secure
+        self.proxy = proxy
         self.ssl_context = ssl.create_default_context(cafile=certifi.where()) if secure else False
         self.cookies = aiohttp.CookieJar(quote_cookie=False)
 
@@ -69,7 +70,7 @@ def __init__(self, client: Client) -> None:
 
     @scrape_limit
     async def get_BS4(self, url: URL) -> BeautifulSoup:
-        async with self.client_session.get(url, ssl=self.client.ssl_context) as response:
+        async with self.client_session.get(url, ssl=self.client.ssl_context, proxy=self.client.proxy) as response:
             content_type = response.headers.get('Content-Type')
             assert content_type is not None
             if not any(s in content_type.lower() for s in ("html", "text")):
@@ -79,55 +80,59 @@ async def get_BS4(self, url: URL) -> BeautifulSoup:
 
     @scrape_limit
     async def get_BS4_and_url(self, url: URL) -> Tuple[BeautifulSoup, URL]:
-        async with self.client_session.get(url, ssl=self.client.ssl_context) as response:
+        async with self.client_session.get(url, ssl=self.client.ssl_context, proxy=self.client.proxy) as response:
             text = await response.text()
             soup = BeautifulSoup(text, 'html.parser')
             return soup, URL(response.url)
 
     @scrape_limit
     async def get_json(self, url: URL, params: Optional[Dict] = None, headers_inc: Optional[Dict] = None) -> Dict:
         headers = {**self.headers, **headers_inc} if headers_inc else self.headers
-        async with self.client_session.get(url, ssl=self.client.ssl_context, params=params, headers=headers) as response:
+        async with self.client_session.get(url, ssl=self.client.ssl_context, params=params, headers=headers,
+                                           proxy=self.client.proxy) as response:
             return json.loads(await response.content.read())
 
     @scrape_limit
     async def get_json_with_headers(self, url: URL, params: Optional[Dict] = None,
                                     headers_inc: Optional[Dict] = None) -> tuple[Any, CIMultiDictProxy[str]]:
         headers = {**self.headers, **headers_inc} if headers_inc else self.headers
-        async with self.client_session.get(url, ssl=self.client.ssl_context, params=params, headers=headers) as response:
+        async with self.client_session.get(url, ssl=self.client.ssl_context, params=params, headers=headers,
+                                           proxy=self.client.proxy) as response:
             content = await response.content.read()
             return json.loads(content), response.headers
 
     @scrape_limit
     async def get_text(self, url: URL) -> str:
-        async with self.client_session.get(url, ssl=self.client.ssl_context) as response:
+        async with self.client_session.get(url, ssl=self.client.ssl_context, proxy=self.client.proxy) as response:
             return await response.text()
 
     @scrape_limit
     async def post(self, url: URL, data: Dict) -> Dict:
-        async with self.client_session.post(url, data=data, headers=self.headers, ssl=self.client.ssl_context) as response:
+        async with self.client_session.post(url, data=data, headers=self.headers, ssl=self.client.ssl_context,
+                                            proxy=self.client.proxy) as response:
             return json.loads(await response.content.read())
 
     @scrape_limit
     async def post_with_auth(self, url: URL, data: Dict, auth: aiohttp.BasicAuth) -> Dict:
-        async with self.client_session.post(url, data=data, headers=self.headers, ssl=self.client.ssl_context, auth=auth) as response:
+        async with self.client_session.post(url, data=data, headers=self.headers, ssl=self.client.ssl_context, auth=auth,
+                                            proxy=self.client.proxy) as response:
             return json.loads(await response.content.read())
 
     @scrape_limit
     async def get_no_resp(self, url: URL, headers: Dict) -> None:
-        async with self.client_session.get(url, headers=headers, ssl=self.client.ssl_context):
+        async with self.client_session.get(url, headers=headers, ssl=self.client.ssl_context, proxy=self.client.proxy):
             pass
 
     @scrape_limit
     async def post_data_no_resp(self, url: URL, data: Dict) -> None:
-        async with self.client_session.post(url, data=data, headers=self.headers, ssl=self.client.ssl_context):
+        async with self.client_session.post(url, data=data, headers=self.headers, ssl=self.client.ssl_context, proxy=self.client.proxy):
             pass
 
     @scrape_limit
     async def head(self, url: URL, headers_inc: Dict, allow_redirects=True) -> tuple[CIMultiDictProxy[str], URL]:
         headers = {**self.headers, **headers_inc} if headers_inc else self.headers
         async with self.client_session.head(url, headers=headers, ssl=self.client.ssl_context, raise_for_status=False,
-                                            allow_redirects=allow_redirects) as response:
+                                            allow_redirects=allow_redirects, proxy=self.client.proxy) as response:
             return response.headers, response.url
 
     async def exit_handler(self) -> None:
@@ -162,7 +167,7 @@ async def _append_content(self, file: Path, content: aiohttp.StreamReader,
                 else:
                     update_progress(len(chunk))
 
-    async def _download(self, media: MediaItem, current_throttle: float, proxy: str, headers: Dict,
+    async def _download(self, media: MediaItem, current_throttle: float, headers: Dict,
                         save_content: Callable[[aiohttp.StreamReader], Coroutine[Any, Any, None]], file: Path) -> None:
         headers['Referer'] = str(media.referer)
         headers['user-agent'] = self.client.user_agent
@@ -171,7 +176,7 @@ async def _download(self, media: MediaItem, current_throttle: float, proxy: str,
         await self._throttle(current_throttle, media.url.host)
 
         async with self.client_session.get(media.url, headers=headers, ssl=self.client.ssl_context,
-                                           raise_for_status=True, proxy=proxy) as resp:
+                                           raise_for_status=True, proxy=self.client.proxy) as resp:
             content_type = resp.headers.get('Content-Type')
             if not content_type:
                 raise DownloadFailure(status=CustomHTTPStatus.IM_A_TEAPOT, message="No content-type in response header")
@@ -210,34 +215,33 @@ async def _throttle(self, delay: float, host: str) -> None:
             await asyncio.sleep(remaining)
 
     async def download_file(self, Progress_Master: ProgressMaster, media: MediaItem, file: Path,
-                            current_throttle: float, resume_point: int, proxy: str, headers: Dict,
-                            file_task: TaskID) -> None:
+                            current_throttle: float, resume_point: int, headers: Dict, file_task: TaskID) -> None:
 
         async def save_content(content: aiohttp.StreamReader) -> None:
             await Progress_Master.FileProgress.advance_file(file_task, resume_point)
             await self._append_content(file, content, functools.partial(Progress_Master.FileProgress.advance_file, file_task))
 
-        await self._download(media, current_throttle, proxy, headers, save_content, file)
+        await self._download(media, current_throttle, headers, save_content, file)
 
     async def old_download_file(self, media: MediaItem, file: Path, current_throttle: float, resume_point: int,
-                                proxy: str, headers: Dict, size: int) -> None:
+                                headers: Dict, size: int) -> None:
 
         async def save_content(content: aiohttp.StreamReader) -> None:
             task_description = adjust_title(f"{media.url.host}: {media.filename}")
             with tqdm(total=size + resume_point, unit_scale=True, unit='B', leave=False,
                       initial=resume_point, desc=task_description) as progress:
                 await self._append_content(file, content, lambda chunk_len: progress.update(chunk_len))
 
-        await self._download(media, current_throttle, proxy, headers, save_content, file)
+        await self._download(media, current_throttle, headers, save_content, file)
 
-    async def get_filesize(self, url: URL, referer: str, current_throttle: float, headers: Dict, proxy: str) -> int:
+    async def get_filesize(self, url: URL, referer: str, current_throttle: float, headers: Dict) -> int:
         headers['Referer'] = referer
         headers['user-agent'] = self.client.user_agent
 
         assert url.host is not None
         await self._throttle(current_throttle, url.host)
         async with self.client_session.get(url, headers=headers, ssl=self.client.ssl_context,
-                                           raise_for_status=False, proxy=proxy) as resp:
+                                           raise_for_status=False, proxy=self.client.proxy) as resp:
             if resp.status > 206:
                 if "Server" in resp.headers:
                     if resp.headers["Server"] == "ddos-guard":

diff --git a/cyberdrop_dl/downloader/downloaders.py b/cyberdrop_dl/downloader/downloaders.py
@@ -236,7 +236,7 @@ async def download_file(self, album: str, media: MediaItem, url_path: str, album
 
             if not await self.CDL_Helper.SQL_Helper.sql_check_old_existing(url_path) and download_bool:
                 await self.download_session.download_file(self.Progress_Master, media, partial_file, self.throttle,
-                                                          resume_point, self.CDL_Helper.proxy, headers, file_task)
+                                                          resume_point, headers, file_task)
                 partial_file.rename(complete_file)
 
             await self.CDL_Helper.SQL_Helper.mark_complete(url_path, original_filename)
@@ -328,7 +328,7 @@ async def check_file_exists(self, complete_file: Path, partial_file: Path, media
         while True:
             if not expected_size:
                 expected_size = await self.download_session.get_filesize(media.url, str(media.referer),
-                                                                         current_throttle, headers, self.CDL_Helper.proxy)
+                                                                         current_throttle, headers)
             if not complete_file.exists() and not partial_file.exists():
                 break
 

diff --git a/cyberdrop_dl/downloader/old_downloaders.py b/cyberdrop_dl/downloader/old_downloaders.py
@@ -197,7 +197,7 @@ async def download_file(self, album: str, media: MediaItem, url_path: str) -> No
 
             if not await self.SQL_Helper.sql_check_old_existing(url_path) and download_bool:
                 await self.download_session.old_download_file(media, partial_file, current_throttle, resume_point,
-                                                              self.proxy, headers, expected_size)
+                                                              headers, expected_size)
                 partial_file.rename(complete_file)
 
             await self.SQL_Helper.mark_complete(url_path, original_filename)
@@ -288,7 +288,7 @@ async def check_file_exists(self, complete_file: Path, partial_file: Path, media
         while True:
             if not expected_size:
                 expected_size = await self.download_session.get_filesize(media.url, str(media.referer),
-                                                                         current_throttle, headers, self.proxy)
+                                                                         current_throttle, headers)
 
             if not complete_file.exists() and not partial_file.exists():
                 break

diff --git a/cyberdrop_dl/main.py b/cyberdrop_dl/main.py
@@ -301,7 +301,7 @@ async def director(args: Dict, links: List) -> None:
     links = await consolidate_links(args, links)
     client = Client(args['Ratelimiting']['ratelimit'], args['Ratelimiting']['throttle'],
                     args['Runtime']['allow_insecure_connections'], args["Ratelimiting"]["connection_timeout"],
-                    args["Ratelimiting"]["read_timeout"], args['Runtime']['user_agent'])
+                    args["Ratelimiting"]["read_timeout"], args['Runtime']['user_agent'], args['Runtime']['proxy'])
     SQL_Helper = SQLHelper(args['Ignore']['ignore_history'], args['Ignore']['ignore_cache'], args['Files']['db_file'])
     Scraper = ScrapeMapper(args, client, SQL_Helper, False, error_writer, cache_manager)