diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index c8303c217..797094cd3 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -12,7 +12,7 @@ import aiohttp from aiohttp import ClientSession -from cyberdrop_dl.clients.errors import DownloadError, InvalidContentTypeError +from cyberdrop_dl.clients.errors import DownloadError, InsufficientFreeSpaceError, InvalidContentTypeError from cyberdrop_dl.utils.constants import DEBUG_VAR, FILE_FORMATS from cyberdrop_dl.utils.logger import log @@ -105,13 +105,13 @@ async def _download( domain == "pixeldrain" and self.manager.config_manager.authentication_data["PixelDrain"]["pixeldrain_api_key"] ): - download_headers["Authorization"] = await self.manager.download_manager.basic_auth( + download_headers["Authorization"] = self.manager.download_manager.basic_auth( "Cyberdrop-DL", self.manager.config_manager.authentication_data["PixelDrain"]["pixeldrain_api_key"], ) downloaded_filename = await self.manager.db_manager.history_table.get_downloaded_filename(domain, media_item) - download_dir = await self.get_download_dir(media_item) + download_dir = self.get_download_dir(media_item) media_item.partial_file = download_dir / f"{downloaded_filename}.part" resume_point = 0 @@ -138,13 +138,13 @@ async def _download( if not isinstance(media_item.complete_file, Path): proceed, skip = await self.get_final_file_info(media_item, domain) await self.mark_incomplete(media_item, domain) - await self.client_manager.check_bunkr_maint(resp.headers) + self.client_manager.check_bunkr_maint(resp.headers) if skip: - await self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.download_progress.add_skipped() return False if not proceed: log(f"Skipping {media_item.url} as it has already been downloaded", 10) - await self.manager.progress_manager.download_progress.add_previously_completed(False) + self.manager.progress_manager.download_progress.add_previously_completed(False) await self.process_completed(media_item, domain) await self.handle_media_item_completion(media_item, downloaded=False) @@ -161,13 +161,13 @@ async def _download( if resp.status != HTTPStatus.PARTIAL_CONTENT and media_item.partial_file.is_file(): media_item.partial_file.unlink() - media_item.task_id = await self.manager.progress_manager.file_progress.add_task( + media_item.task_id = self.manager.progress_manager.file_progress.add_task( f"({domain.upper()}) {media_item.filename}", media_item.filesize + resume_point, ) if media_item.partial_file.is_file(): resume_point = media_item.partial_file.stat().st_size - await self.manager.progress_manager.file_progress.advance_file(media_item.task_id, resume_point) + self.manager.progress_manager.file_progress.advance_file(media_item.task_id, resume_point) await save_content(resp.content) return True @@ -179,8 +179,8 @@ async def _append_content( update_progress: partial, ) -> None: """Appends content to a file.""" - if not await self.client_manager.manager.download_manager.check_free_space(media_item.download_folder): - raise DownloadError(status="Insufficient Free Space", message="Not enough free space") + if not self.client_manager.manager.download_manager.check_free_space(media_item.download_folder): + raise InsufficientFreeSpaceError(origin=media_item) media_item.partial_file.parent.mkdir(parents=True, exist_ok=True) if not media_item.partial_file.is_file(): @@ -190,7 +190,7 @@ async def _append_content( await self.client_manager.check_bucket(chunk) await asyncio.sleep(0) await f.write(chunk) - await update_progress(len(chunk)) + update_progress(len(chunk)) if not content.total_bytes and not media_item.partial_file.stat().st_size: media_item.partial_file.unlink() raise DownloadError(status=HTTPStatus.INTERNAL_SERVER_ERROR, message="File is empty") @@ -199,7 +199,7 @@ async def download_file(self, manager: Manager, domain: str, media_item: MediaIt """Starts a file.""" if self.manager.config_manager.settings_data["Download_Options"]["skip_download_mark_completed"]: log(f"Download Skip {media_item.url} due to mark completed option", 10) - await self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.download_progress.add_skipped() # set completed path await self.mark_incomplete(media_item, domain) await self.process_completed(media_item, domain) @@ -235,7 +235,7 @@ async def mark_completed(self, domain: str, media_item: MediaItem) -> None: async def add_file_size(self, domain: str, media_item: MediaItem) -> None: if not isinstance(media_item.complete_file, Path): - media_item.complete_file = await self.get_file_location(media_item) + media_item.complete_file = self.get_file_location(media_item) if media_item.complete_file.exists(): await self.manager.db_manager.history_table.add_filesize(domain, media_item) @@ -255,7 +255,7 @@ async def handle_media_item_completion(self, media_item: MediaItem, downloaded: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def get_download_dir(self, media_item: MediaItem) -> Path: + def get_download_dir(self, media_item: MediaItem) -> Path: """Returns the download directory for the media item.""" download_folder = media_item.download_folder if self.manager.args_manager.retry_any: @@ -267,13 +267,13 @@ async def get_download_dir(self, media_item: MediaItem) -> Path: media_item.download_folder = download_folder return download_folder - async def get_file_location(self, media_item: MediaItem) -> Path: - download_dir = await self.get_download_dir(media_item) + def get_file_location(self, media_item: MediaItem) -> Path: + download_dir = self.get_download_dir(media_item) return download_dir / media_item.filename async def get_final_file_info(self, media_item: MediaItem, domain: str) -> tuple[bool, bool]: """Complicated checker for if a file already exists, and was already downloaded.""" - media_item.complete_file = await self.get_file_location(media_item) + media_item.complete_file = self.get_file_location(media_item) media_item.partial_file = media_item.complete_file.with_suffix(media_item.complete_file.suffix + ".part") expected_size = media_item.filesize if isinstance(media_item.filesize, int) else None @@ -297,7 +297,7 @@ async def get_final_file_info(self, media_item: MediaItem, domain: str) -> tuple while True: if expected_size: - file_size_check = await self.check_filesize_limits(media_item) + file_size_check = self.check_filesize_limits(media_item) if not file_size_check: log(f"Download Skip {media_item.url} due to filesize restrictions", 10) proceed = False @@ -370,20 +370,15 @@ async def iterate_filename(self, complete_file: Path, media_item: MediaItem) -> break return complete_file, partial_file - async def check_filesize_limits(self, media: MediaItem) -> bool: + def check_filesize_limits(self, media: MediaItem) -> bool: """Checks if the file size is within the limits.""" - max_video_filesize = self.manager.config_manager.settings_data["File_Size_Limits"][ - "maximum_video_size" - ] or float("inf") - min_video_filesize = self.manager.config_manager.settings_data["File_Size_Limits"]["minimum_video_size"] - max_image_filesize = self.manager.config_manager.settings_data["File_Size_Limits"][ - "maximum_image_size" - ] or float("inf") - min_image_filesize = self.manager.config_manager.settings_data["File_Size_Limits"]["minimum_image_size"] - max_other_filesize = self.manager.config_manager.settings_data["File_Size_Limits"][ - "maximum_other_size" - ] or float("inf") - min_other_filesize = self.manager.config_manager.settings_data["File_Size_Limits"]["minimum_other_size"] + file_size_limits = self.manager.config_manager.settings_data["File_Size_Limits"] + max_video_filesize = file_size_limits["maximum_video_size"] or float("inf") + min_video_filesize = file_size_limits["minimum_video_size"] + max_image_filesize = file_size_limits["maximum_image_size"] or float("inf") + min_image_filesize = file_size_limits["minimum_image_size"] + max_other_filesize = file_size_limits["maximum_other_size"] or float("inf") + min_other_filesize = file_size_limits["minimum_other_size"] if media.ext in FILE_FORMATS["Images"]: proceed = min_image_filesize < media.filesize < max_image_filesize diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index d0db33b45..d8c1cf121 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -69,7 +69,7 @@ async def hash_item(self, file: Path | str, original_filename: str, referer: URL return None if self.hashes[key]: return self.hashes[key] - await self.manager.progress_manager.hash_progress.update_currently_hashing(file) + self.manager.progress_manager.hash_progress.update_currently_hashing(file) hash = await self.manager.db_manager.hash_table.get_file_hash_exists(file) try: if not hash: @@ -80,9 +80,9 @@ async def hash_item(self, file: Path | str, original_filename: str, referer: URL original_filename, referer, ) - await self.manager.progress_manager.hash_progress.add_new_completed_hash() + self.manager.progress_manager.hash_progress.add_new_completed_hash() else: - await self.manager.progress_manager.hash_progress.add_prev_hash() + self.manager.progress_manager.hash_progress.add_prev_hash() await self.manager.db_manager.hash_table.insert_or_update_hash_db( hash, file, @@ -102,12 +102,12 @@ async def hash_item_during_download(self, media_item: MediaItem) -> None: log(f"After hash processing failed: {media_item.complete_file} with error {e}", 40, exc_info=True) async def cleanup_dupes(self) -> None: - async with self.manager.live_manager.get_hash_live(): + with self.manager.live_manager.get_hash_live(): if not self.manager.config_manager.global_settings_data["Dupe_Cleanup_Options"]["delete_after_download"]: return file_hashes_dict = await self.get_file_hashes_dict() async with self.manager.live_manager.get_remove_file_via_hash_live(): - final_candiates_dict = await self.get_candiate_per_group(file_hashes_dict) + final_candiates_dict = self.get_candiate_per_group(file_hashes_dict) await self.final_dupe_cleanup(final_candiates_dict) async def final_dupe_cleanup(self, final_dict: dict[str, dict]) -> None: @@ -135,7 +135,7 @@ async def final_dupe_cleanup(self, final_dict: dict[str, dict]) -> None: try: self.send2trash(ele) log(f"Sent prev download: {ele!s} to trash with hash {hash}", 10) - await self.manager.progress_manager.hash_progress.add_removed_prev_file() + self.manager.progress_manager.hash_progress.add_removed_prev_file() except OSError: continue # keep a previous downloads @@ -146,7 +146,7 @@ async def final_dupe_cleanup(self, final_dict: dict[str, dict]) -> None: try: self.send2trash(ele) log(f"Sent prev download: {ele!s} to trash with hash {hash}", 10) - await self.manager.progress_manager.hash_progress.add_removed_prev_file() + self.manager.progress_manager.hash_progress.add_removed_prev_file() except OSError: continue # delete current download @@ -155,7 +155,7 @@ async def final_dupe_cleanup(self, final_dict: dict[str, dict]) -> None: if selected_file.exists(): self.send2trash(selected_file) log(f"Sent new download:{selected_file} to trash with hash {hash}", 10) - await self.manager.progress_manager.hash_progress.add_removed_file() + self.manager.progress_manager.hash_progress.add_removed_file() except OSError: pass @@ -174,7 +174,7 @@ async def get_file_hashes_dict(self) -> dict: log(f"After hash processing failed: {item} with error {e}", 40, exc_info=True) return hashes_dict - async def get_candiate_per_group(self, hashes_dict: dict[str, dict[int, list[Path]]]) -> dict: + def get_candiate_per_group(self, hashes_dict: dict[str, dict[int, list[Path]]]) -> dict: # remove downloaded files, so each group only has the one previously downloaded file or the first downloaded file for hash, size_dict in hashes_dict.items(): for size, files in size_dict.items(): @@ -189,7 +189,7 @@ async def get_candiate_per_group(self, hashes_dict: dict[str, dict[int, list[Pat try: self.send2trash(file) log(f"Sent new download : {file} to trash with hash {hash}", 10) - await self.manager.progress_manager.hash_progress.add_removed_file() + self.manager.progress_manager.hash_progress.add_removed_file() except OSError: pass diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index ceb4fde4c..4d62daf15 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -34,7 +34,7 @@ async def wrapper(self: Downloader, *args, **kwargs) -> None: try: return await func(self, *args, **kwargs) except DownloadError as e: - await self.attempt_task_removal(media_item) + self.attempt_task_removal(media_item) max_attempts = self.manager.config_manager.global_settings_data["Rate_Limiting_Options"][ "download_attempts" @@ -50,13 +50,13 @@ async def wrapper(self: Downloader, *args, **kwargs) -> None: log(f"{self.log_prefix} failed: {media_item.url} {log_message}", 40) if media_item.current_attempt >= max_attempts: - await self.manager.progress_manager.download_stats_progress.add_failure(e.ui_message) + self.manager.progress_manager.download_stats_progress.add_failure(e.ui_message) await self.manager.log_manager.write_download_error_log( media_item.url, e.message, media_item.referer, ) - await self.manager.progress_manager.download_progress.add_failed() + self.manager.progress_manager.download_progress.add_failed() break retrying_message = f"Retrying {self.log_prefix.lower()}: {media_item.url} ,retry attempt: {media_item.current_attempt + 1}" @@ -85,10 +85,10 @@ async def wrapper(self: Downloader, *args, **kwargs) -> None: failed_message = f"{self.log_prefix} failed: {media_item.url} with error: {log_message}" log(failed_message, 40, exc_info=exc_info) - await self.attempt_task_removal(media_item) + self.attempt_task_removal(media_item) await self.manager.log_manager.write_download_error_log(media_item.url, log_message_short, origin) - await self.manager.progress_manager.download_stats_progress.add_failure(ui_message) - await self.manager.progress_manager.download_progress.add_failed() + self.manager.progress_manager.download_stats_progress.add_failure(ui_message) + self.manager.progress_manager.download_progress.add_failed() break return wrapper @@ -111,10 +111,10 @@ def __init__(self, manager: Manager, domain: str) -> None: self._current_attempt_filesize = {} self.log_prefix = "Download attempt (unsupported domain)" if domain == "no_crawler" else "Download" - async def startup(self) -> None: + def startup(self) -> None: """Starts the downloader.""" self.client = self.manager.client_manager.downloader_session - self._semaphore = asyncio.Semaphore(await self.manager.download_manager.get_download_limit(self.domain)) + self._semaphore = asyncio.Semaphore(self.manager.download_manager.get_download_limit(self.domain)) self.manager.path_manager.download_dir.mkdir(parents=True, exist_ok=True) if self.manager.config_manager.settings_data["Sorting"]["sort_downloads"]: @@ -129,7 +129,7 @@ async def run(self, media_item: MediaItem) -> None: self.waiting_items -= 1 if media_item.url.path not in self.processed_items: self.processed_items.append(media_item.url.path) - await self.manager.progress_manager.download_progress.update_total() + self.manager.progress_manager.download_progress.update_total() log(f"{self.log_prefix} starting: {media_item.url}", 20) async with self.manager.client_manager.download_session_limit: @@ -140,8 +140,8 @@ async def run(self, media_item: MediaItem) -> None: await self.download(media_item) except Exception as e: log(f"{self.log_prefix} failed: {media_item.url} with error {e}", 40, exc_info=True) - await self.manager.progress_manager.download_stats_progress.add_failure("Unknown") - await self.manager.progress_manager.download_progress.add_failed() + self.manager.progress_manager.download_stats_progress.add_failure("Unknown") + self.manager.progress_manager.download_progress.add_failed() else: log(f"{self.log_prefix} finished: {media_item.url}", 20) finally: @@ -150,14 +150,14 @@ async def run(self, media_item: MediaItem) -> None: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def check_file_can_download(self, media_item: MediaItem) -> None: + def check_file_can_download(self, media_item: MediaItem) -> None: """Checks if the file can be downloaded.""" - if not await self.manager.download_manager.check_free_space(media_item.download_folder): + if not self.manager.download_manager.check_free_space(media_item.download_folder): raise InsufficientFreeSpaceError(origin=media_item) - if not await self.manager.download_manager.check_allowed_filetype(media_item): + if not self.manager.download_manager.check_allowed_filetype(media_item): raise RestrictedFiletypeError(origin=media_item) - async def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> None: + def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> None: """Sets the file's datetime.""" if self.manager.config_manager.settings_data["Download_Options"]["disable_file_timestamps"]: return @@ -169,11 +169,11 @@ async def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> accessed=media_item.datetime, ) - async def attempt_task_removal(self, media_item: MediaItem) -> None: + def attempt_task_removal(self, media_item: MediaItem) -> None: """Attempts to remove the task from the progress bar.""" if not isinstance(media_item.task_id, Field): with contextlib.suppress(ValueError): - await self.manager.progress_manager.file_progress.remove_file(media_item.task_id) + self.manager.progress_manager.file_progress.remove_file(media_item.task_id) media_item.task_id = field(init=False) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @@ -186,26 +186,26 @@ async def download(self, media_item: MediaItem) -> None: if not isinstance(media_item.current_attempt, int): media_item.current_attempt = 1 - await self.check_file_can_download(media_item) + self.check_file_can_download(media_item) downloaded = await self.client.download_file(self.manager, self.domain, media_item) if downloaded: Path.chmod(media_item.complete_file, 0o666) - await self.set_file_datetime(media_item, media_item.complete_file) - await self.attempt_task_removal(media_item) - await self.manager.progress_manager.download_progress.add_completed() + self.set_file_datetime(media_item, media_item.complete_file) + self.attempt_task_removal(media_item) + self.manager.progress_manager.download_progress.add_completed() except RestrictedFiletypeError: - await self.manager.progress_manager.download_progress.add_skipped() - await self.attempt_task_removal(media_item) + self.manager.progress_manager.download_progress.add_skipped() + self.attempt_task_removal(media_item) except (DownloadError, aiohttp.ClientResponseError) as e: ui_message = getattr(e, "ui_message", e.status) log_message_short = log_message = f"{e.status} - {e.message}" log(f"{self.log_prefix} failed: {media_item.url} with error: {log_message}", 40) await self.manager.log_manager.write_download_error_log(media_item.url, log_message_short, origin) - await self.manager.progress_manager.download_stats_progress.add_failure(ui_message) - await self.manager.progress_manager.download_progress.add_failed() - await self.attempt_task_removal(media_item) + self.manager.progress_manager.download_stats_progress.add_failure(ui_message) + self.manager.progress_manager.download_progress.add_failed() + self.attempt_task_removal(media_item) except ( aiohttp.ClientPayloadError, diff --git a/cyberdrop_dl/main.py b/cyberdrop_dl/main.py index efaeac8cc..86f9cb50b 100644 --- a/cyberdrop_dl/main.py +++ b/cyberdrop_dl/main.py @@ -173,7 +173,7 @@ async def director(manager: Manager) -> None: log_spacer(20) log("Starting CDL...\n", 20) - async with manager.live_manager.get_main_live(stop=True): + with manager.live_manager.get_main_live(stop=True): await runtime(manager) await post_runtime(manager) @@ -209,9 +209,9 @@ def main() -> None: with contextlib.suppress(Exception): print_to_console("\nTrying to Exit...") asyncio.run(manager.close()) - except Exception: + except Exception as e: log_with_color( - "An error occurred, please report this to the developer:", + f"An error occurred, please report this to the developer: {e}", "bold red", 50, show_in_stats=False, diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index ed5084983..e9e126857 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -141,7 +141,7 @@ async def check_http_status( raise DownloadError(status=status, message=message, origin=origin) @staticmethod - async def check_bunkr_maint(headers: dict): + def check_bunkr_maint(headers: dict): if headers.get("Content-Length") == "322509" and headers.get("Content-Type") == "video/mp4": raise DownloadError(status="Bunkr Maintenance", message="Bunkr under maintenance") diff --git a/cyberdrop_dl/managers/console_manager.py b/cyberdrop_dl/managers/console_manager.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/managers/download_manager.py b/cyberdrop_dl/managers/download_manager.py index 727fddceb..72032179a 100644 --- a/cyberdrop_dl/managers/download_manager.py +++ b/cyberdrop_dl/managers/download_manager.py @@ -58,7 +58,7 @@ def __init__(self, manager: Manager) -> None: "xxxbunker": 2, } - async def get_download_limit(self, key: str) -> int: + def get_download_limit(self, key: str) -> int: """Returns the download limit for a domain.""" if key in self.download_limits: instances = self.download_limits[key] @@ -75,12 +75,12 @@ async def get_download_limit(self, key: str) -> int: ) @staticmethod - async def basic_auth(username: str, password: str) -> str: + def basic_auth(username: str, password: str) -> str: """Returns a basic auth token.""" token = b64encode(f"{username}:{password}".encode()).decode("ascii") return f"Basic {token}" - async def check_free_space(self, folder: Path | None = None) -> bool: + def check_free_space(self, folder: Path | None = None) -> bool: """Checks if there is enough free space on the drive to continue operating.""" if not folder: folder = self.manager.path_manager.download_dir @@ -96,26 +96,17 @@ async def check_free_space(self, folder: Path | None = None) -> bool: free_space_gb = free_space / 1024**3 return free_space_gb >= self.manager.config_manager.global_settings_data["General"]["required_free_space"] - async def check_allowed_filetype(self, media_item: MediaItem) -> bool: + def check_allowed_filetype(self, media_item: MediaItem) -> bool: """Checks if the file type is allowed to download.""" - if ( - media_item.ext in FILE_FORMATS["Images"] - and self.manager.config_manager.settings_data["Ignore_Options"]["exclude_images"] - ): + ignore_options = self.manager.config_manager.settings_data["Ignore_Options"] + valid_extensions = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] | FILE_FORMATS["Audio"] + if media_item.ext in FILE_FORMATS["Images"] and ignore_options["exclude_images"]: return False - if ( - media_item.ext in FILE_FORMATS["Videos"] - and self.manager.config_manager.settings_data["Ignore_Options"]["exclude_videos"] - ): + if media_item.ext in FILE_FORMATS["Videos"] and ignore_options["exclude_videos"]: return False - if ( - media_item.ext in FILE_FORMATS["Audio"] - and self.manager.config_manager.settings_data["Ignore_Options"]["exclude_audio"] - ): + if media_item.ext in FILE_FORMATS["Audio"] and ignore_options["exclude_audio"]: return False return not ( self.manager.config_manager.settings_data["Ignore_Options"]["exclude_other"] - and media_item.ext not in FILE_FORMATS["Images"] - and media_item.ext not in FILE_FORMATS["Videos"] - and media_item.ext not in FILE_FORMATS["Audio"] + and media_item.ext not in valid_extensions ) diff --git a/cyberdrop_dl/managers/live_manager.py b/cyberdrop_dl/managers/live_manager.py index 1dfb68402..1b6074a2b 100644 --- a/cyberdrop_dl/managers/live_manager.py +++ b/cyberdrop_dl/managers/live_manager.py @@ -1,7 +1,7 @@ from __future__ import annotations -from contextlib import asynccontextmanager -from typing import TYPE_CHECKING, AsyncGenerator +from contextlib import contextmanager +from typing import TYPE_CHECKING, Generator from rich.console import Console from rich.live import Live @@ -23,8 +23,8 @@ def __init__(self, manager: Manager) -> None: console=console, ) - @asynccontextmanager - async def get_live(self, layout: Layout, stop: bool = False) -> AsyncGenerator[Live]: + @contextmanager + def get_live(self, layout: Layout, stop: bool = False) -> Generator[Live]: try: if self.manager.args_manager.no_ui: yield @@ -39,27 +39,27 @@ async def get_live(self, layout: Layout, stop: bool = False) -> AsyncGenerator[L except Exception as e: log(f"Issue with rich live {e}", level=10, exc_info=True) - @asynccontextmanager - async def get_main_live(self, stop: bool = False) -> AsyncGenerator[Live]: + @contextmanager + def get_main_live(self, stop: bool = False) -> Generator[Live]: """Main UI startup and context manager.""" layout = self.manager.progress_manager.layout - async with self.get_live(layout, stop=stop) as live: + with self.get_live(layout, stop=stop) as live: yield live - @asynccontextmanager - async def get_remove_file_via_hash_live(self, stop: bool = False) -> AsyncGenerator[Live]: + @contextmanager + def get_remove_file_via_hash_live(self, stop: bool = False) -> Generator[Live]: layout = self.manager.progress_manager.hash_remove_layout - async with self.get_live(layout, stop=stop) as live: + with self.get_live(layout, stop=stop) as live: yield live - @asynccontextmanager - async def get_hash_live(self, stop: bool = False) -> AsyncGenerator[Live]: + @contextmanager + def get_hash_live(self, stop: bool = False) -> Generator[Live]: layout = self.manager.progress_manager.hash_layout - async with self.get_live(layout, stop=stop) as live: + with self.get_live(layout, stop=stop) as live: yield live - @asynccontextmanager - async def get_sort_live(self, stop: bool = False) -> AsyncGenerator[Live]: + @contextmanager + def get_sort_live(self, stop: bool = False) -> Generator[Live]: layout = self.manager.progress_manager.sort_layout - async with self.get_live(layout, stop=stop) as live: + with self.get_live(layout, stop=stop) as live: yield live diff --git a/cyberdrop_dl/managers/manager.py b/cyberdrop_dl/managers/manager.py index f9e7c441b..c98f101c9 100644 --- a/cyberdrop_dl/managers/manager.py +++ b/cyberdrop_dl/managers/manager.py @@ -107,32 +107,26 @@ def args_startup(self) -> None: async def async_startup(self) -> None: """Async startup process for the manager.""" - await self.args_consolidation() - await self.args_logging() + self.args_consolidation() + self.args_logging() - if not isinstance(self.db_manager, DBManager): - self.db_manager = DBManager(self, self.path_manager.history_db) - self.db_manager.ignore_history = self.config_manager.settings_data["Runtime_Options"]["ignore_history"] - await self.db_manager.startup() if not isinstance(self.client_manager, ClientManager): self.client_manager = ClientManager(self) if not isinstance(self.download_manager, DownloadManager): self.download_manager = DownloadManager(self) - if not isinstance(self.hash_manager, HashManager): - self.hash_manager = HashManager(self) - await self.hash_manager.startup() - if not isinstance(self.live_manager, LiveManager): - self.live_manager = LiveManager(self) if not isinstance(self.real_debrid_manager, RealDebridManager): self.real_debrid_manager = RealDebridManager(self) - self.progress_manager = ProgressManager(self) - await self.progress_manager.startup() + await self.async_db_hash_startup() # set files from args - from cyberdrop_dl.utils.constants import MAX_NAME_LENGTHS + import cyberdrop_dl.utils.constants as constants - MAX_NAME_LENGTHS["FILE"] = int(self.config_manager.global_settings_data["General"]["max_file_name_length"]) - MAX_NAME_LENGTHS["FOLDER"] = int(self.config_manager.global_settings_data["General"]["max_folder_name_length"]) + constants.MAX_NAME_LENGTHS["FILE"] = int( + self.config_manager.global_settings_data["General"]["max_file_name_length"] + ) + constants.MAX_NAME_LENGTHS["FOLDER"] = int( + self.config_manager.global_settings_data["General"]["max_folder_name_length"] + ) async def async_db_hash_startup(self) -> None: # start up the db manager and hash manager only for scanning @@ -145,9 +139,9 @@ async def async_db_hash_startup(self) -> None: if not isinstance(self.live_manager, LiveManager): self.live_manager = LiveManager(self) self.progress_manager = ProgressManager(self) - await self.progress_manager.startup() + self.progress_manager.startup() - async def args_consolidation(self) -> None: + def args_consolidation(self) -> None: """Consolidates runtime arguments with config values.""" cli_settings_groups = ["Download_Options", "File_Size_Limits", "Ignore_Options", "Runtime_Options"] parsed_args = self.args_manager.parsed_args @@ -161,7 +155,7 @@ async def args_consolidation(self) -> None: elif self.args_manager.parsed_args[arg] is not None: self.config_manager.settings_data[cli_settings_group][arg] = parsed_args[arg] - async def args_logging(self) -> None: + def args_logging(self) -> None: """Logs the runtime arguments.""" forum_xf_cookies_provided = {} forum_credentials_provided = {} @@ -190,9 +184,7 @@ async def args_logging(self) -> None: print_settings["Logs"]["log_folder"] = str(print_settings["Logs"]["log_folder"]) print_settings["Logs"]["webhook_url"] = bool(print_settings["Logs"]["webhook_url"]) print_settings["Sorting"]["sort_folder"] = str(print_settings["Sorting"]["sort_folder"]) - print_settings["Sorting"]["scan_folder"] = ( - str(print_settings["Sorting"]["scan_folder"]) if str(print_settings["Sorting"]["scan_folder"]) else "" - ) + print_settings["Sorting"]["scan_folder"] = str(print_settings["Sorting"]["scan_folder"]) or "" log(f"Starting Cyberdrop-DL Process - Config: {self.config_manager.loaded_config}", 10) log(f"Running version {__version__}", 10) diff --git a/cyberdrop_dl/managers/progress_manager.py b/cyberdrop_dl/managers/progress_manager.py index 3ca2e9c4d..433d7dd73 100644 --- a/cyberdrop_dl/managers/progress_manager.py +++ b/cyberdrop_dl/managers/progress_manager.py @@ -49,27 +49,27 @@ def __init__(self, manager: Manager) -> None: self.hash_layout: Layout = field(init=False) self.sort_layout: Layout = field(init=False) - async def startup(self) -> None: + def startup(self) -> None: """Startup process for the progress manager.""" progress_layout = Layout() progress_layout.split_column( Layout(name="upper", ratio=2, minimum_size=8), - Layout(renderable=await self.scraping_progress.get_progress(), name="Scraping", ratio=2), - Layout(renderable=await self.file_progress.get_progress(), name="Downloads", ratio=2), + Layout(renderable=self.scraping_progress.get_progress(), name="Scraping", ratio=2), + Layout(renderable=self.file_progress.get_progress(), name="Downloads", ratio=2), ) progress_layout["upper"].split_row( - Layout(renderable=await self.download_progress.get_progress(), name="Files", ratio=1), - Layout(renderable=await self.scrape_stats_progress.get_progress(), name="Scrape Failures", ratio=1), - Layout(renderable=await self.download_stats_progress.get_progress(), name="Download Failures", ratio=1), + Layout(renderable=self.download_progress.get_progress(), name="Files", ratio=1), + Layout(renderable=self.scrape_stats_progress.get_progress(), name="Scrape Failures", ratio=1), + Layout(renderable=self.download_stats_progress.get_progress(), name="Download Failures", ratio=1), ) hash_remove_layout = Layout() - hash_remove_layout = await self.hash_progress.get_removed_progress() + hash_remove_layout = self.hash_progress.get_removed_progress() self.layout = progress_layout self.hash_remove_layout = hash_remove_layout - self.hash_layout = await self.hash_progress.get_hash_progress() - self.sort_layout = await self.sort_progress.get_progress() + self.hash_layout = self.hash_progress.get_hash_progress() + self.sort_layout = self.sort_progress.get_progress() def print_stats(self, start_time: timedelta | float) -> None: """Prints the stats of the program.""" diff --git a/cyberdrop_dl/managers/realdebrid_manager.py b/cyberdrop_dl/managers/realdebrid_manager.py index 8f567a88a..4e9098a7a 100644 --- a/cyberdrop_dl/managers/realdebrid_manager.py +++ b/cyberdrop_dl/managers/realdebrid_manager.py @@ -32,7 +32,7 @@ def __init__(self, manager: Manager) -> None: self.api: RealDebridApi = field(init=False) self._folder_guess_functions = [self._guess_folder_by_part, self._guess_folder_by_query] - async def startup(self) -> None: + def startup(self) -> None: """Startup process for Real Debrid manager.""" try: self.api = RealDebridApi(self.__api_token, True) @@ -48,11 +48,11 @@ async def startup(self) -> None: log(f"Failed RealDebrid setup: {e.error}", 40) self.enabled = False - async def is_supported_folder(self, url: URL) -> bool: + def is_supported_folder(self, url: URL) -> bool: match = self.folder_regex.search(str(url)) return bool(match) - async def is_supported_file(self, url: URL) -> bool: + def is_supported_file(self, url: URL) -> bool: match = self.file_regex.search(str(url)) return bool(match) @@ -60,14 +60,14 @@ def is_supported(self, url: URL) -> bool: match = self.supported_regex.search(str(url)) return bool(match) or "real-debrid" in url.host.lower() - async def unrestrict_link(self, url: URL, password: str | None = None) -> URL: + def unrestrict_link(self, url: URL, password: str | None = None) -> URL: return self.api.unrestrict.link(url, password).get("download") - async def unrestrict_folder(self, url: URL) -> list[URL]: + def unrestrict_folder(self, url: URL) -> list[URL]: return self.api.unrestrict.folder(url) @staticmethod - async def _guess_folder_by_part(url: URL): + def _guess_folder_by_part(url: URL): for word in FOLDER_AS_PART: if word in url.parts: index = url.parts.index(word) @@ -76,16 +76,16 @@ async def _guess_folder_by_part(url: URL): return None @staticmethod - async def _guess_folder_by_query(url: URL): + def _guess_folder_by_query(url: URL): for word in FOLDER_AS_QUERY: folder = url.query.get(word) if folder: return folder return None - async def guess_folder(self, url: URL) -> str: + def guess_folder(self, url: URL) -> str: for guess_function in self._folder_guess_functions: - folder = await guess_function(url) + folder = guess_function(url) if folder: return folder return url.path diff --git a/cyberdrop_dl/scraper/crawler.py b/cyberdrop_dl/scraper/crawler.py index 2c42096bc..653e5ca89 100644 --- a/cyberdrop_dl/scraper/crawler.py +++ b/cyberdrop_dl/scraper/crawler.py @@ -37,11 +37,11 @@ def __init__(self, manager: Manager, domain: str, folder_domain: str) -> None: self.scraped_items: list = [] self.waiting_items = 0 - async def startup(self) -> None: + def startup(self) -> None: """Starts the crawler.""" self.client = self.manager.client_manager.scraper_session self.downloader = Downloader(self.manager, self.domain) - await self.downloader.startup() + self.downloader.startup() async def run(self, item: ScrapeItem) -> None: """Runs the crawler loop.""" @@ -104,7 +104,7 @@ async def handle_file( if media_item.album_id: await self.manager.db_manager.history_table.set_album_id(self.domain, media_item) log(f"Skipping {url} as it has already been downloaded", 10) - await self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.download_progress.add_previously_completed() return check_referer = False @@ -113,17 +113,17 @@ async def handle_file( if check_referer: log(f"Skipping {url} as referer has been seen before", 10) - await self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.download_progress.add_skipped() return - if await self.manager.download_manager.get_download_limit(self.domain) == 1: + if self.manager.download_manager.get_download_limit(self.domain) == 1: await self.downloader.run(media_item) else: self.manager.task_group.create_task(self.downloader.run(media_item)) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def check_post_number(self, post_number: int, current_post_number: int) -> tuple[bool, bool]: + def check_post_number(self, post_number: int, current_post_number: int) -> tuple[bool, bool]: """Checks if the program should scrape the current post.""" """Returns (scrape_post, continue_scraping)""" scrape_single_forum_post = self.manager.config_manager.settings_data["Download_Options"][ @@ -142,7 +142,7 @@ async def check_post_number(self, post_number: int, current_post_number: int) -> return True, True - async def handle_external_links(self, scrape_item: ScrapeItem) -> None: + def handle_external_links(self, scrape_item: ScrapeItem) -> None: """Maps external links to the scraper class.""" self.manager.task_group.create_task(self.manager.scrape_mapper.filter_and_send_to_crawler(scrape_item)) @@ -205,7 +205,7 @@ async def check_complete_from_referer(self, scrape_item: ScrapeItem | URL) -> bo check_complete = await self.manager.db_manager.history_table.check_complete_by_referer(self.domain, url) if check_complete: log(f"Skipping {url} as it has already been downloaded", 10) - await self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.download_progress.add_previously_completed() return True return False @@ -213,17 +213,17 @@ async def get_album_results(self, album_id: str) -> bool | dict[Any, Any]: """Checks whether an album has completed given its domain and album id.""" return await self.manager.db_manager.history_table.check_album(self.domain, album_id) - async def check_album_results(self, url: URL, album_results: dict[Any, Any]) -> bool: + def check_album_results(self, url: URL, album_results: dict[Any, Any]) -> bool: """Checks whether an album has completed given its domain and album id.""" - url_path = await get_db_path(url.with_query(""), self.domain) + url_path = get_db_path(url.with_query(""), self.domain) if album_results and url_path in album_results and album_results[url_path] != 0: log(f"Skipping {url} as it has already been downloaded", 10) - await self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.download_progress.add_previously_completed() return True return False @staticmethod - async def create_scrape_item( + def create_scrape_item( parent_scrape_item: ScrapeItem, url: URL, new_title_part: str, @@ -246,7 +246,7 @@ async def create_scrape_item( scrape_item.album_id = album_id return scrape_item - async def create_title(self, title: str, album_id: str | None, thread_id: str | None) -> str: + def create_title(self, title: str, album_id: str | None, thread_id: str | None) -> str: """Creates the title for the scrape item.""" if not title: title = "Untitled" diff --git a/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py b/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py index 41368dbd7..de234ddee 100644 --- a/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py @@ -35,21 +35,21 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) - scrape_item.url = await self.get_stream_link(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) + scrape_item.url = self.get_stream_link(scrape_item.url) if scrape_item.url.host.startswith("get"): scrape_item.url = await self.reinforced_link(scrape_item.url) if not scrape_item.url: return - scrape_item.url = await self.get_stream_link(scrape_item.url) + scrape_item.url = self.get_stream_link(scrape_item.url) if "a" in scrape_item.url.parts: await self.album(scrape_item) else: await self.file(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -70,7 +70,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) title = soup.select_one("title").text.rsplit(" | Bunkr")[0].strip() - title = await self.create_title(title, scrape_item.url.parts[2], None) + title = self.create_title(title, scrape_item.url.parts[2], None) scrape_item.add_to_parent_title(title) card_listings: list[Tag] = soup.select('div[class*="relative group/item theItem"]') @@ -79,16 +79,17 @@ async def album(self, scrape_item: ScrapeItem) -> None: file_ext = "." + filename.split(".")[-1] thumbnail = card_listing.select_one("img").get("src") date_str = card_listing.select_one('span[class*="theDate"]').text.strip() - date = await self.parse_datetime(date_str) + date = self.parse_datetime(date_str) link = card_listing.find("a").get("href") if link.startswith("/"): link = URL("https://" + scrape_item.url.host + link) link = URL(link) - link = await self.get_stream_link(link) + link = self.get_stream_link(link) # Try to get final file URL + valid_extensions = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] try: - if file_ext.lower() not in FILE_FORMATS["Images"] and file_ext.lower() not in FILE_FORMATS["Videos"]: + if file_ext.lower() not in valid_extensions: raise FileNotFoundError src = thumbnail.replace("/thumbs/", "/") src = URL(src, encoded=True) @@ -101,7 +102,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: msg = "No image found, reverting to parent" raise FileNotFoundError(msg) - new_scrape_item = await self.create_scrape_item( + new_scrape_item = self.create_scrape_item( scrape_item, link, "", @@ -112,7 +113,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: ) filename, ext = get_filename_and_ext(src.name) - if not await self.check_album_results(src, results): + if not self.check_album_results(src, results): await self.handle_file(src, new_scrape_item, filename, ext) except FileNotFoundError: @@ -183,13 +184,13 @@ async def reinforced_link(self, url: URL) -> URL: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @staticmethod - async def is_cdn(url: URL) -> bool: + def is_cdn(url: URL) -> bool: """Checks if a given URL is from a CDN.""" return bool(re.match(CDN_POSSIBILITIES, url.host)) - async def get_stream_link(self, url: URL) -> URL: + def get_stream_link(self, url: URL) -> URL: """Gets the stream link for a given url.""" - if not await self.is_cdn(url): + if not self.is_cdn(url): return url ext = url.suffix.lower() @@ -206,7 +207,7 @@ async def get_stream_link(self, url: URL) -> URL: return url @staticmethod - async def parse_datetime(date: str) -> int: + def parse_datetime(date: str) -> int: """Parses a datetime string into a unix timestamp.""" date = datetime.datetime.strptime(date, "%H:%M:%S %d/%m/%Y") return calendar.timegm(date.timetuple()) diff --git a/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py b/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py index 8a9f69133..9b6807924 100644 --- a/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py @@ -54,7 +54,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.logged_in: login_url = self.primary_base_domain / "login" @@ -70,7 +70,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: log("CelebForum login failed. Skipping.", 40) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def forum(self, scrape_item: ScrapeItem) -> None: @@ -103,7 +103,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: elem.decompose() thread_id = thread_url.parts[2].split(".")[-1] - title = await self.create_title(title_block.text.replace("\n", ""), None, thread_id) + title = self.create_title(title_block.text.replace("\n", ""), None, thread_id) posts = soup.select(self.posts_selector) for post in posts: diff --git a/cyberdrop_dl/scraper/crawlers/chevereto_crawler.py b/cyberdrop_dl/scraper/crawlers/chevereto_crawler.py index 4a858a82b..57cca7ee4 100644 --- a/cyberdrop_dl/scraper/crawlers/chevereto_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/chevereto_crawler.py @@ -66,7 +66,7 @@ def __init__(self, manager: Manager, domain: str) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if await self.check_direct_link(scrape_item.url): await self.handle_direct_link(scrape_item) @@ -81,7 +81,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.profile(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def profile(self, scrape_item: ScrapeItem) -> None: @@ -89,7 +89,7 @@ async def profile(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) - title = await self.create_title(soup.select_one(self.profile_title_selector).get("content"), None, None) + title = self.create_title(soup.select_one(self.profile_title_selector).get("content"), None, None) async for soup in self.web_pager(scrape_item.url): links = soup.select(self.profile_item_selector) @@ -142,7 +142,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: if "This content is password protected" in sub_albums_soup.text: raise PasswordProtectedError(message="Wrong password" if password else None, origin=scrape_item) - title = await self.create_title( + title = self.create_title( sub_albums_soup.select_one(self.album_title_selector).get_text(), album_id, None, diff --git a/cyberdrop_dl/scraper/crawlers/coomer_crawler.py b/cyberdrop_dl/scraper/crawlers/coomer_crawler.py index fbbc210d2..19b10028d 100644 --- a/cyberdrop_dl/scraper/crawlers/coomer_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/coomer_crawler.py @@ -31,7 +31,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "thumbnails" in scrape_item.url.parts: parts = [x for x in scrape_item.url.parts if x not in ("thumbnail", "/")] @@ -47,7 +47,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.handle_direct_link(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def favorites(self, scrape_item: ScrapeItem) -> None: @@ -157,12 +157,12 @@ async def handle_file(file_obj: dict): ) files = [] - if post.get('file'): - files.append(post['file']) + if post.get("file"): + files.append(post["file"]) + + if post.get("attachments"): + files.extend(post["attachments"]) - if post.get('attachments'): - files.extend(post['attachments']) - for file in files: await handle_file(file) scrape_item.children += 1 @@ -197,7 +197,7 @@ async def create_new_scrape_item( if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: post_title = post_id + " - " + post_title - new_title = await self.create_title(user, None, None) + new_title = self.create_title(user, None, None) new_scrape_item = await self.create_scrape_item( old_scrape_item, link, diff --git a/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py b/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py index 444bffd03..ce12d2860 100644 --- a/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py @@ -34,7 +34,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "a" in scrape_item.url.parts: scrape_item.url = scrape_item.url.with_query("nojs") @@ -42,7 +42,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.file(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -62,7 +62,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: date = title = None try: - title = await self.create_title(soup.select_one("h1[id=title]").text, scrape_item.album_id, None) + title = self.create_title(soup.select_one("h1[id=title]").text, scrape_item.album_id, None) except AttributeError: raise ScrapeError( 404, diff --git a/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py b/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py index ba49a14e5..ee6e99c47 100644 --- a/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py @@ -31,7 +31,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "folder" in scrape_item.url.parts: await self.folder(scrape_item) @@ -40,7 +40,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.file(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def folder(self, scrape_item: ScrapeItem) -> None: @@ -98,7 +98,7 @@ async def folder(self, scrape_item: ScrapeItem) -> None: ajax_soup = BeautifulSoup(ajax_dict["html"].replace("\\", ""), "html.parser") - title = await self.create_title(ajax_dict["page_title"], scrape_item.album_id, None) + title = self.create_title(ajax_dict["page_title"], scrape_item.album_id, None) num_pages = int( ajax_soup.select("a[onclick*=loadImages]")[-1].get("onclick").split(",")[2].split(")")[0].strip(), ) @@ -172,7 +172,7 @@ async def shared(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: ajax_dict = await self.client.post_data("cyberfile", self.api_files, data=data, origin=scrape_item) ajax_soup = BeautifulSoup(ajax_dict["html"].replace("\\", ""), "html.parser") - title = await self.create_title(ajax_dict["page_title"], scrape_item.url.parts[2], None) + title = self.create_title(ajax_dict["page_title"], scrape_item.url.parts[2], None) num_pages = int(ajax_soup.select_one("input[id=rspTotalPages]").get("value")) tile_listings = ajax_soup.select("div[class=fileListing] div[class*=fileItem]") diff --git a/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py b/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py index db0ac040f..0a1471af5 100644 --- a/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py @@ -30,7 +30,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "g" in scrape_item.url.parts: if not self.warnings_set: @@ -42,7 +42,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -50,7 +50,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) - title = await self.create_title(soup.select_one("h1[id=gn]").get_text(), None, None) + title = self.create_title(soup.select_one("h1[id=gn]").get_text(), None, None) date = await self.parse_datetime(soup.select_one("td[class=gdt2]").get_text()) scrape_item.type = FILE_HOST_ALBUM scrape_item.children = scrape_item.children_limit = 0 diff --git a/cyberdrop_dl/scraper/crawlers/erome_crawler.py b/cyberdrop_dl/scraper/crawlers/erome_crawler.py index f87ce0b45..9d258e32a 100644 --- a/cyberdrop_dl/scraper/crawlers/erome_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/erome_crawler.py @@ -26,14 +26,14 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "a" in scrape_item.url.parts: await self.album(scrape_item) else: await self.profile(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def profile(self, scrape_item: ScrapeItem) -> None: @@ -41,7 +41,7 @@ async def profile(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) - title = await self.create_title(scrape_item.url.name, None, None) + title = self.create_title(scrape_item.url.name, None, None) albums = soup.select("a[class=album-link]") scrape_item.type = FILE_HOST_PROFILE @@ -91,7 +91,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: title_portion = soup.select_one("title").text.rsplit(" - Porn")[0].strip() if not title_portion: title_portion = scrape_item.url.name - title = await self.create_title(title_portion, scrape_item.url.parts[2], None) + title = self.create_title(title_portion, scrape_item.url.parts[2], None) scrape_item.add_to_parent_title(title) images = soup.select('img[class="img-front lasyload"]') diff --git a/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py b/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py index 640aaa047..2e498a158 100644 --- a/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py @@ -54,7 +54,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.logged_in: login_url = self.primary_base_domain / "login" @@ -70,7 +70,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: log("F95Zone login failed. Skipping.", 40) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def forum(self, scrape_item: ScrapeItem) -> None: @@ -103,7 +103,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: elem.decompose() thread_id = thread_url.parts[2].split(".")[-1] - title = await self.create_title(title_block.text.replace("\n", ""), None, thread_id) + title = self.create_title(title_block.text.replace("\n", ""), None, thread_id) posts = soup.select(self.posts_selector) for post in posts: diff --git a/cyberdrop_dl/scraper/crawlers/fapello_crawler.py b/cyberdrop_dl/scraper/crawlers/fapello_crawler.py index e2532df88..090c57cc0 100644 --- a/cyberdrop_dl/scraper/crawlers/fapello_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/fapello_crawler.py @@ -26,7 +26,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not str(scrape_item.url).endswith("/"): scrape_item.url = URL(str(scrape_item.url) + "/") @@ -36,7 +36,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.profile(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def profile(self, scrape_item: ScrapeItem) -> None: @@ -57,7 +57,7 @@ async def profile(self, scrape_item: ScrapeItem) -> None: "maximum_number_of_children" ][scrape_item.type] - title = await self.create_title( + title = self.create_title( soup.select_one('h2[class="font-semibold lg:text-2xl text-lg mb-2 mt-4"]').get_text(), None, None, diff --git a/cyberdrop_dl/scraper/crawlers/gofile_crawler.py b/cyberdrop_dl/scraper/crawlers/gofile_crawler.py index 19420935b..5d25737eb 100644 --- a/cyberdrop_dl/scraper/crawlers/gofile_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/gofile_crawler.py @@ -42,14 +42,14 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) await self.get_token(self.api_address / "accounts", self.client) await self.get_website_token(self.js_address, self.client) await self.album(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -101,7 +101,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: if JSON_Resp["canAccess"] is False: raise ScrapeError(403, "Album is private", origin=scrape_item) - title = await self.create_title(JSON_Resp["name"], content_id, None) + title = self.create_title(JSON_Resp["name"], content_id, None) # Do not reset nested folders if scrape_item.type != FILE_HOST_ALBUM: scrape_item.type = FILE_HOST_ALBUM diff --git a/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py b/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py index 7dbdf04dd..ef1bf02d9 100644 --- a/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py @@ -28,7 +28,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "album" in scrape_item.url.parts: await self.album(scrape_item) @@ -38,7 +38,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -47,7 +47,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) scrape_item.album_id = scrape_item.url.parts[2] - title = await self.create_title(soup.select_one("title").text.rsplit(" - ")[0], scrape_item.album_id, None) + title = self.create_title(soup.select_one("title").text.rsplit(" - ")[0], scrape_item.album_id, None) scrape_item.add_to_parent_title(title) scrape_item.part_of_album = True scrape_item.type = FILE_HOST_ALBUM diff --git a/cyberdrop_dl/scraper/crawlers/imageban_crawler.py b/cyberdrop_dl/scraper/crawlers/imageban_crawler.py index 290e97728..e663041ba 100644 --- a/cyberdrop_dl/scraper/crawlers/imageban_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imageban_crawler.py @@ -28,7 +28,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "a" in scrape_item.url.parts: await self.album(scrape_item) @@ -39,7 +39,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.handle_direct(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -50,7 +50,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.album_id = scrape_item.url.parts[2] scrape_item.part_of_album = True - title = await self.create_title( + title = self.create_title( soup.select_one("title").get_text().replace("Просмотр альбома: ", ""), scrape_item.album_id, None, @@ -92,7 +92,7 @@ async def compilation(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) - title = await self.create_title(soup.select_one("blockquote").get_text(), scrape_item.url.parts[2], None) + title = self.create_title(soup.select_one("blockquote").get_text(), scrape_item.url.parts[2], None) scrape_item.add_to_parent_title(title) content_block = soup.select("div[class=container-fluid]")[-1] images = content_block.select("img") diff --git a/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py b/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py index 47a781495..2d8f5aec8 100644 --- a/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py @@ -30,7 +30,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if await self.check_direct_link(scrape_item.url): image_id = scrape_item.url.parts[1] @@ -42,7 +42,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.image(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -61,7 +61,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: "maximum_number_of_children" ][scrape_item.type] - title = await self.create_title( + title = self.create_title( soup.select_one("a[data-text=album-name]").get_text(), scrape_item.album_id, None, diff --git a/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py b/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py index 933c23d6e..33c8739a3 100644 --- a/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py @@ -29,7 +29,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "t" in scrape_item.url.host or "_" in scrape_item.url.name: scrape_item.url = self.primary_base_domain / scrape_item.url.name.split("_")[0] @@ -42,7 +42,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.image(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -64,7 +64,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: "maximum_number_of_children" ][scrape_item.type] - title = await self.create_title( + title = self.create_title( soup.select_one("div[id=gallery-view] h1").get_text().strip().rsplit(" - ", 1)[0], scrape_item.album_id, None, diff --git a/cyberdrop_dl/scraper/crawlers/imgur_crawler.py b/cyberdrop_dl/scraper/crawlers/imgur_crawler.py index 4e3c36859..7a65cd0cb 100644 --- a/cyberdrop_dl/scraper/crawlers/imgur_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imgur_crawler.py @@ -29,7 +29,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "i.imgur.com" in scrape_item.url.host: await self.handle_direct(scrape_item) @@ -38,7 +38,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.image(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -67,7 +67,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: origin=scrape_item, ) title_part = JSON_Obj["data"].get("title", album_id) - title = await self.create_title(title_part, scrape_item.url.parts[2], None) + title = self.create_title(title_part, scrape_item.url.parts[2], None) async with self.request_limiter: JSON_Obj = await self.client.get_json( diff --git a/cyberdrop_dl/scraper/crawlers/kemono_crawler.py b/cyberdrop_dl/scraper/crawlers/kemono_crawler.py index 25acc9ea0..912176a4a 100644 --- a/cyberdrop_dl/scraper/crawlers/kemono_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/kemono_crawler.py @@ -32,7 +32,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "thumbnails" in scrape_item.url.parts: parts = [x for x in scrape_item.url.parts if x not in ("thumbnail", "/")] @@ -48,7 +48,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.handle_direct_link(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def profile(self, scrape_item: ScrapeItem) -> None: @@ -150,12 +150,12 @@ async def handle_file(file_obj: dict): await self.create_new_scrape_item(link, scrape_item, user_str, post_title, post_id, date) files = [] - if post.get('file'): - files.append(post['file']) + if post.get("file"): + files.append(post["file"]) + + if post.get("attachments"): + files.extend(post["attachments"]) - if post.get('attachments'): - files.extend(post['attachments']) - for file in files: if scrape_item.children_limit: if scrape_item.children >= scrape_item.children_limit: @@ -181,7 +181,7 @@ async def get_content_links(self, scrape_item: ScrapeItem, post: dict, user: str if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: post_title = post_id + " - " + post_title - new_title = await self.create_title(user, None, None) + new_title = self.create_title(user, None, None) scrape_item = await self.create_scrape_item( scrape_item, scrape_item.url, @@ -252,7 +252,7 @@ async def create_new_scrape_item( if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: post_title = post_id + " - " + post_title - new_title = await self.create_title(user, None, None) + new_title = self.create_title(user, None, None) new_scrape_item = await self.create_scrape_item( old_scrape_item, link, diff --git a/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py b/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py index af357bae5..d009a0502 100644 --- a/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py @@ -54,7 +54,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "threads" in scrape_item.url.parts: if not self.logged_in: @@ -79,7 +79,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: scrape_item.parents[0] if scrape_item.parents else None, ) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def forum(self, scrape_item: ScrapeItem) -> None: @@ -111,7 +111,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: elem.decompose() thread_id = thread_url.parts[2].split(".")[-1] - title = await self.create_title(title_block.text.replace("\n", ""), None, thread_id) + title = self.create_title(title_block.text.replace("\n", ""), None, thread_id) posts = soup.select(self.posts_selector) for post in posts: diff --git a/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py b/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py index a3d88c60c..8f2b17c29 100644 --- a/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py @@ -30,14 +30,14 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "folder" in scrape_item.url.parts: await self.folder(scrape_item) else: await self.file(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def folder(self, scrape_item: ScrapeItem) -> None: @@ -48,7 +48,7 @@ async def folder(self, scrape_item: ScrapeItem) -> None: except api.MediaFireApiError as e: raise ScrapeError(status=f"MF - {e.message}", origin=scrape_item) from None - title = await self.create_title(folder_details["folder_info"]["name"], folder_key, None) + title = self.create_title(folder_details["folder_info"]["name"], folder_key, None) scrape_item.type = FILE_HOST_ALBUM scrape_item.children = scrape_item.children_limit = 0 diff --git a/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py b/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py index c5ac6be95..707086c45 100644 --- a/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py @@ -41,7 +41,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "thumbnails" in scrape_item.url.parts: parts = [x for x in scrape_item.url.parts if x not in ("thumbnail", "/")] @@ -62,7 +62,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.handle_direct_link(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def profile(self, scrape_item: ScrapeItem) -> None: @@ -269,7 +269,7 @@ async def create_new_scrape_item( if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: post_title = post_id + " - " + post_title - new_title = await self.create_title(user, None, None) + new_title = self.create_title(user, None, None) new_scrape_item = await self.create_scrape_item( old_scrape_item, link, diff --git a/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py b/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py index 7b76fbc0c..cca047449 100644 --- a/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py @@ -55,7 +55,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.logged_in: login_url = self.primary_base_domain / "forum/login" @@ -71,7 +71,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: log("Nudostar login failed. Skipping.", 40) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def forum(self, scrape_item: ScrapeItem) -> None: @@ -103,7 +103,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: elem.decompose() thread_id = thread_url.parts[2].split(".")[-1] - title = await self.create_title(title_block.text.replace("\n", ""), None, thread_id) + title = self.create_title(title_block.text.replace("\n", ""), None, thread_id) posts = soup.select(self.posts_selector) for post in posts: diff --git a/cyberdrop_dl/scraper/crawlers/nudostartv_crawler.py b/cyberdrop_dl/scraper/crawlers/nudostartv_crawler.py index 61f60f788..bb3be0b0e 100644 --- a/cyberdrop_dl/scraper/crawlers/nudostartv_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/nudostartv_crawler.py @@ -24,12 +24,12 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) scrape_item.url = URL(str(scrape_item.url) + "/") await self.profile(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def profile(self, scrape_item: ScrapeItem) -> None: @@ -37,7 +37,7 @@ async def profile(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) - title = await self.create_title(soup.select_one("title").get_text().split("/")[0], None, None) + title = self.create_title(soup.select_one("title").get_text().split("/")[0], None, None) content = soup.select("div[id=list_videos_common_videos_list_items] div a") for page in content: link = URL(page.get("href")) diff --git a/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py b/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py index 61f0d6266..267d11c6d 100644 --- a/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py @@ -31,7 +31,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "chapter" in scrape_item.url.name: await self.chapter(scrape_item) @@ -40,7 +40,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.handle_direct_link(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def series(self, scrape_item: ScrapeItem) -> None: @@ -106,7 +106,7 @@ async def chapter(self, scrape_item: ScrapeItem) -> None: title_parts = soup.select_one("title").get_text().split(" - ") series_name = title_parts[0] chapter_title = title_parts[1] - series_title = await self.create_title(series_name, None, None) + series_title = self.create_title(series_name, None, None) scrape_item.add_to_parent_title(series_title) scrape_item.add_to_parent_title(chapter_title) diff --git a/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py b/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py index 35d257d67..6ec24c0b2 100644 --- a/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py @@ -28,14 +28,14 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "album" in scrape_item.url.parts: await self.album(scrape_item) else: await self.image(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -54,7 +54,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: "maximum_number_of_children" ][scrape_item.type] - title = await self.create_title( + title = self.create_title( soup.select_one("span[class=author-header__album-name]").get_text(), scrape_item.album_id, None, diff --git a/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py b/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py index 7a40c4497..241915983 100644 --- a/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py @@ -27,14 +27,14 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "l" in scrape_item.url.parts: await self.folder(scrape_item) else: await self.file(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def folder(self, scrape_item: ScrapeItem) -> None: @@ -58,7 +58,7 @@ async def folder(self, scrape_item: ScrapeItem) -> None: origin=scrape_item, ) - title = await self.create_title(JSON_Resp["title"], scrape_item.url.parts[2], None) + title = self.create_title(JSON_Resp["title"], scrape_item.url.parts[2], None) for file in JSON_Resp["files"]: link = await self.create_download_link(file["id"]) diff --git a/cyberdrop_dl/scraper/crawlers/postimg_crawler.py b/cyberdrop_dl/scraper/crawlers/postimg_crawler.py index 8f000fa55..c168d7d15 100644 --- a/cyberdrop_dl/scraper/crawlers/postimg_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/postimg_crawler.py @@ -28,7 +28,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "i.postimg.cc" in scrape_item.url.host: filename, ext = get_filename_and_ext(scrape_item.url.name) @@ -38,7 +38,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.image(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -58,7 +58,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.part_of_album = True scrape_item.album_id = scrape_item.url.parts[2] - title = await self.create_title(scrape_item.url.raw_name, scrape_item.album_id, None) + title = self.create_title(scrape_item.url.raw_name, scrape_item.album_id, None) for image in JSON_Resp["images"]: link = URL(image[4]) diff --git a/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py b/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py index 648d7d998..78768935d 100644 --- a/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py @@ -30,7 +30,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) await self.set_cookies() @@ -42,7 +42,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def tag(self, scrape_item: ScrapeItem) -> None: @@ -51,7 +51,7 @@ async def tag(self, scrape_item: ScrapeItem) -> None: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) title_portion = scrape_item.url.query["tags"].strip() - title = await self.create_title(title_portion, None, None) + title = self.create_title(title_portion, None, None) scrape_item.type = FILE_HOST_ALBUM scrape_item.children = scrape_item.children_limit = 0 diff --git a/cyberdrop_dl/scraper/crawlers/realdebrid_crawler.py b/cyberdrop_dl/scraper/crawlers/realdebrid_crawler.py index 8163fc2e2..fa55a83a7 100644 --- a/cyberdrop_dl/scraper/crawlers/realdebrid_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/realdebrid_crawler.py @@ -25,7 +25,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) scrape_item.url = await self.get_original_url(scrape_item) if await self.manager.real_debrid_manager.is_supported_folder(scrape_item.url): @@ -33,7 +33,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.file(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def folder(self, scrape_item: ScrapeItem) -> None: @@ -44,7 +44,7 @@ async def folder(self, scrape_item: ScrapeItem) -> None: scrape_item.album_id = folder_id scrape_item.part_of_album = True - title = await self.create_title(f"{folder_id} [{original_url.host.lower()}]", None, None) + title = self.create_title(f"{folder_id} [{original_url.host.lower()}]", None, None) scrape_item.add_to_parent_title(title) async with self.request_limiter: @@ -73,7 +73,7 @@ async def file(self, scrape_item: ScrapeItem) -> None: self_hosted = self.is_self_hosted(original_url) if not self_hosted: - title = await self.create_title(f"files [{original_url.host.lower()}]", None, None) + title = self.create_title(f"files [{original_url.host.lower()}]", None, None) scrape_item.part_of_album = True scrape_item.add_to_parent_title(title) async with self.request_limiter: diff --git a/cyberdrop_dl/scraper/crawlers/reddit_crawler.py b/cyberdrop_dl/scraper/crawlers/reddit_crawler.py index 920ee874d..f32f1eb40 100644 --- a/cyberdrop_dl/scraper/crawlers/reddit_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/reddit_crawler.py @@ -34,12 +34,12 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.reddit_personal_use_script or not self.reddit_secret: log("Reddit API credentials not found. Skipping.", 30) await self.manager.progress_manager.scrape_stats_progress.add_failure("Failed Login") - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) return async with aiohttp.ClientSession() as reddit_session: @@ -61,13 +61,13 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) await self.manager.progress_manager.scrape_stats_progress.add_failure("Unknown") - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def user(self, scrape_item: ScrapeItem, reddit: asyncpraw.Reddit) -> None: """Scrapes user pages.""" username = scrape_item.url.name or scrape_item.url.parts[-2] - title = await self.create_title(username, None, None) + title = self.create_title(username, None, None) scrape_item.add_to_parent_title(title) scrape_item.part_of_album = True @@ -79,7 +79,7 @@ async def user(self, scrape_item: ScrapeItem, reddit: asyncpraw.Reddit) -> None: async def subreddit(self, scrape_item: ScrapeItem, reddit: asyncpraw.Reddit) -> None: """Scrapes subreddit pages.""" subreddit = scrape_item.url.name or scrape_item.url.parts[-2] - title = await self.create_title(subreddit, None, None) + title = self.create_title(subreddit, None, None) scrape_item.add_to_parent_title(title) scrape_item.part_of_album = True diff --git a/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py b/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py index ee4d71cd7..3b7f93ab9 100644 --- a/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py @@ -27,7 +27,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.token: await self.manage_token(self.redgifs_api / "v2/auth/temporary") @@ -38,7 +38,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.post(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def user(self, scrape_item: ScrapeItem) -> None: @@ -68,7 +68,7 @@ async def user(self, scrape_item: ScrapeItem) -> None: for gif in gifs: links = gif["urls"] date = gif["createDate"] - title = await self.create_title(user_id, None, None) + title = self.create_title(user_id, None, None) try: link = URL(links["hd"]) @@ -103,7 +103,7 @@ async def post(self, scrape_item: ScrapeItem) -> None: ) title_part = JSON_Resp["gif"].get("title", "Loose Files") - title = await self.create_title(title_part, None, None) + title = self.create_title(title_part, None, None) links = JSON_Resp["gif"]["urls"] date = JSON_Resp["gif"]["createDate"] diff --git a/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py b/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py index caeb91b99..77c753e05 100644 --- a/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py @@ -29,7 +29,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "post" in scrape_item.url.parts: await self.file(scrape_item) @@ -38,7 +38,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.tag(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def tag(self, scrape_item: ScrapeItem) -> None: @@ -46,7 +46,7 @@ async def tag(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) - title = await self.create_title(scrape_item.url.parts[1], None, None) + title = self.create_title(scrape_item.url.parts[1], None, None) scrape_item.part_of_album = True scrape_item.type = FILE_HOST_ALBUM scrape_item.children = scrape_item.children_limit = 0 @@ -98,7 +98,7 @@ async def playlist(self, scrape_item: ScrapeItem) -> None: title_str = soup.select_one("div[class*=title]").text scrape_item.part_of_album = True scrape_item.album_id = scrape_item.url.parts[-1] - title = await self.create_title(title_str, scrape_item.album_id, None) + title = self.create_title(title_str, scrape_item.album_id, None) content_block = soup.select_one('div[class="box-grid ng-star-inserted"]') content = content_block.select('a[class="box ng-star-inserted"]') diff --git a/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py b/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py index fb61237bf..5db456140 100644 --- a/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py @@ -30,7 +30,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) await self.set_cookies() @@ -42,7 +42,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def tag(self, scrape_item: ScrapeItem) -> None: @@ -59,7 +59,7 @@ async def tag(self, scrape_item: ScrapeItem) -> None: ][scrape_item.type] title_portion = scrape_item.url.query["tags"].strip() - title = await self.create_title(title_portion, None, None) + title = self.create_title(title_portion, None, None) scrape_item.part_of_album = True content = soup.select("div[class=image-list] span a") diff --git a/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py b/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py index 2dbfaa1c1..2b1353689 100644 --- a/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py @@ -29,14 +29,14 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "post" in scrape_item.url.parts: await self.file(scrape_item) else: await self.tag(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def tag(self, scrape_item: ScrapeItem) -> None: @@ -52,7 +52,7 @@ async def tag(self, scrape_item: ScrapeItem) -> None: "maximum_number_of_children" ][scrape_item.type] - title = await self.create_title(scrape_item.url.parts[1], None, None) + title = self.create_title(scrape_item.url.parts[1], None, None) scrape_item.part_of_album = True content_block = soup.select_one('div[class="box-grid ng-star-inserted"]') diff --git a/cyberdrop_dl/scraper/crawlers/saint_crawler.py b/cyberdrop_dl/scraper/crawlers/saint_crawler.py index c15509b0b..9a1e4a094 100644 --- a/cyberdrop_dl/scraper/crawlers/saint_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/saint_crawler.py @@ -29,7 +29,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) scrape_item.url = self.primary_base_domain.with_path(scrape_item.url.path) if "a" in scrape_item.url.parts: @@ -37,7 +37,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.video(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -53,7 +53,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: title_portion = soup.select_one("title").text.rsplit(" - Saint Video Hosting")[0].strip() if not title_portion: title_portion = scrape_item.url.name - title = await self.create_title(title_portion, album_id, None) + title = self.create_title(title_portion, album_id, None) scrape_item.add_to_parent_title(title) videos = soup.select("a.btn-primary.action.download") diff --git a/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py b/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py index 054c65d6b..eb64e0d8d 100644 --- a/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py @@ -28,7 +28,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "r" in scrape_item.url.parts: await self.subreddit(scrape_item) @@ -36,13 +36,13 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) await self.manager.progress_manager.scrape_stats_progress.add_failure("Unsupported Link") - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def subreddit(self, scrape_item: ScrapeItem) -> None: """Scrapes an album.""" subreddit = scrape_item.url.parts[-1] - title = await self.create_title(subreddit, None, None) + title = self.create_title(subreddit, None, None) scrape_item.add_to_parent_title(title) scrape_item.part_of_album = True scrape_item.type = FILE_HOST_ALBUM diff --git a/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py b/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py index ca7ad4cd4..5825e07c0 100644 --- a/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py @@ -56,7 +56,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.logged_in and self.login_attempts == 0: login_url = self.primary_base_domain / "login" @@ -74,7 +74,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: await self.forum(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def forum(self, scrape_item: ScrapeItem) -> None: @@ -106,7 +106,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: elem.decompose() thread_id = thread_url.parts[2].split(".")[-1] - title = await self.create_title(title_block.text.replace("\n", ""), None, thread_id) + title = self.create_title(title_block.text.replace("\n", ""), None, thread_id) posts = soup.select(self.posts_selector) for post in posts: diff --git a/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py b/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py index b6557ddac..da6449fab 100644 --- a/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py @@ -55,7 +55,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.logged_in: login_url = self.primary_base_domain / "login" @@ -73,7 +73,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: log("SocialMediaGirls login failed. Skipping.", 40) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def forum(self, scrape_item: ScrapeItem) -> None: @@ -105,7 +105,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: elem.decompose() thread_id = thread_url.parts[2].split(".")[-1] - title = await self.create_title(title_block.text.replace("\n", ""), None, thread_id) + title = self.create_title(title_block.text.replace("\n", ""), None, thread_id) posts = soup.select(self.posts_selector) for post in posts: diff --git a/cyberdrop_dl/scraper/crawlers/tokyomotion_crawler.py b/cyberdrop_dl/scraper/crawlers/tokyomotion_crawler.py index 4f4e717c3..2137377a4 100644 --- a/cyberdrop_dl/scraper/crawlers/tokyomotion_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/tokyomotion_crawler.py @@ -43,7 +43,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) new_query = MultiDict(scrape_item.url.query) new_query.pop("page", None) scrape_item.url = self.primary_base_domain.with_path(scrape_item.url.path).with_query(new_query) @@ -69,7 +69,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.search(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def video(self, scrape_item: ScrapeItem) -> None: @@ -110,7 +110,7 @@ async def video(self, scrape_item: ScrapeItem) -> None: async def albums(self, scrape_item: ScrapeItem) -> None: """Scrapes user albums.""" user = scrape_item.url.parts[2] - user_title = await self.create_title(f"{user} [user]", scrape_item.album_id, None) + user_title = self.create_title(f"{user} [user]", scrape_item.album_id, None) if user_title not in scrape_item.parent_title.split("/"): scrape_item.add_to_parent_title(user_title) @@ -134,7 +134,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: title = scrape_item.url.parts[-1] if "user" in scrape_item.url.parts: user = scrape_item.url.parts[2] - user_title = await self.create_title(f"{user} [user]", scrape_item.album_id, None) + user_title = self.create_title(f"{user} [user]", scrape_item.album_id, None) if user_title not in scrape_item.parent_title.split("/"): scrape_item.add_to_parent_title(user_title) @@ -143,7 +143,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.part_of_album = True if self.folder_domain not in scrape_item.parent_title: - title = await self.create_title(title, scrape_item.album_id, None) + title = self.create_title(title, scrape_item.album_id, None) if "favorite" in scrape_item.url.parts: scrape_item.add_to_parent_title("favorite") @@ -193,7 +193,7 @@ async def image(self, scrape_item: ScrapeItem) -> None: async def profile(self, scrape_item: ScrapeItem) -> None: """Scrapes an user profile.""" user = scrape_item.url.parts[2] - user_title = await self.create_title(f"{user} [user]", scrape_item.album_id, None) + user_title = self.create_title(f"{user} [user]", scrape_item.album_id, None) if user_title not in scrape_item.parent_title.split("/"): scrape_item.add_to_parent_title(user_title) @@ -212,7 +212,7 @@ async def search(self, scrape_item: ScrapeItem) -> None: return search_query = scrape_item.url.query.get("search_query") - search_title = await self.create_title(f"{search_query} [{search_type} search]", scrape_item.album_id, None) + search_title = self.create_title(f"{search_query} [{search_type} search]", scrape_item.album_id, None) if search_title not in scrape_item.parent_title.split("/"): scrape_item.add_to_parent_title(search_title) @@ -243,7 +243,7 @@ async def playlist(self, scrape_item: ScrapeItem) -> None: """Scrapes a video playlist.""" title = scrape_item.url.parts[-1] user = scrape_item.url.parts[2] - user_title = await self.create_title(f"{user} [user]", scrape_item.album_id, None) + user_title = self.create_title(f"{user} [user]", scrape_item.album_id, None) if user_title not in scrape_item.parent_title.split("/"): scrape_item.add_to_parent_title(user_title) @@ -251,7 +251,7 @@ async def playlist(self, scrape_item: ScrapeItem) -> None: scrape_item.add_to_parent_title("favorite") if self.folder_domain not in scrape_item.parent_title: - title = await self.create_title(title, scrape_item.album_id, None) + title = self.create_title(title, scrape_item.album_id, None) if title not in scrape_item.parent_title.split("/"): scrape_item.add_to_parent_title(title) diff --git a/cyberdrop_dl/scraper/crawlers/toonily_crawler.py b/cyberdrop_dl/scraper/crawlers/toonily_crawler.py index da93b28bf..2ed8df63a 100644 --- a/cyberdrop_dl/scraper/crawlers/toonily_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/toonily_crawler.py @@ -29,7 +29,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "chapter" in scrape_item.url.name: await self.chapter(scrape_item) @@ -38,7 +38,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.handle_direct_link(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def series(self, scrape_item: ScrapeItem) -> None: @@ -93,7 +93,7 @@ async def chapter(self, scrape_item: ScrapeItem) -> None: title_parts = soup.select_one("title").get_text().split(" - ") series_name = title_parts[0] chapter_title = title_parts[1] - series_title = await self.create_title(series_name, None, None) + series_title = self.create_title(series_name, None, None) scrape_item.add_to_parent_title(series_title) scrape_item.add_to_parent_title(chapter_title) diff --git a/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py b/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py index a2d28f128..b06eb5058 100644 --- a/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py @@ -59,7 +59,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if not self.logged_in: login_url = self.primary_base_domain / "login" @@ -75,7 +75,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: log("XBunker login failed. Skipping.", 40) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def forum(self, scrape_item: ScrapeItem) -> None: @@ -107,7 +107,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: elem.decompose() thread_id = thread_url.parts[2].split(".")[-1] - title = await self.create_title(title_block.text.replace("\n", ""), None, thread_id) + title = self.create_title(title_block.text.replace("\n", ""), None, thread_id) posts = soup.select(self.posts_selector) for post in posts: diff --git a/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py b/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py index a6ca5cd15..862ef903a 100644 --- a/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py @@ -27,7 +27,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) if "media" in cast(str, scrape_item.url.host): filename, ext = get_filename_and_ext(scrape_item.url.name) @@ -35,7 +35,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.album(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def album(self, scrape_item: ScrapeItem) -> None: @@ -54,7 +54,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: "maximum_number_of_children" ][scrape_item.type] - title = await self.create_title(soup.select_one("h1[id=title]").text, scrape_item.album_id, None) + title = self.create_title(soup.select_one("h1[id=title]").text, scrape_item.album_id, None) links = soup.select("a[class=image]") for link in links: diff --git a/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py b/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py index 3fb1ec661..97cd2fee5 100644 --- a/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py @@ -38,7 +38,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - task_id = await self.scraping_progress.add_task(scrape_item.url) + task_id = self.scraping_progress.add_task(scrape_item.url) # Old behavior, not worth it with such a bad rate_limit: modify URL to always start on page 1 """ @@ -53,7 +53,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: else: await self.video(scrape_item) - await self.scraping_progress.remove_task(task_id) + self.scraping_progress.remove_task(task_id) @error_handling_wrapper async def video(self, scrape_item: ScrapeItem) -> None: @@ -132,13 +132,13 @@ async def playlist(self, scrape_item: ScrapeItem) -> None: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) if "favoritevideos" in scrape_item.url.parts: - title = await self.create_title(f"user {scrape_item.url.parts[2]} [favorites]", None, None) + title = self.create_title(f"user {scrape_item.url.parts[2]} [favorites]", None, None) elif "search" in scrape_item.url.parts: - title = await self.create_title(f"{scrape_item.url.parts[2].replace('+', ' ')} [search]", None, None) + title = self.create_title(f"{scrape_item.url.parts[2].replace('+', ' ')} [search]", None, None) elif len(scrape_item.url.parts) >= 2: - title = await self.create_title(f"{scrape_item.url.parts[2]} [categorie]", None, None) + title = self.create_title(f"{scrape_item.url.parts[2]} [categorie]", None, None) # Not a valid URL else: diff --git a/cyberdrop_dl/scraper/filters.py b/cyberdrop_dl/scraper/filters.py index d9ecd3a62..19e650ef8 100644 --- a/cyberdrop_dl/scraper/filters.py +++ b/cyberdrop_dl/scraper/filters.py @@ -62,7 +62,7 @@ def has_valid_extension(url: URL) -> bool: try: _, ext = get_filename_and_ext(url.name) - valid_exts = FILE_FORMATS["Images"] + FILE_FORMATS["Videos"] + FILE_FORMATS["Audio"] + valid_exts = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] | FILE_FORMATS["Audio"] return ext in valid_exts except NoExtensionError: return False diff --git a/cyberdrop_dl/scraper/jdownloader.py b/cyberdrop_dl/scraper/jdownloader.py index 90374bdd6..e38ea05ef 100644 --- a/cyberdrop_dl/scraper/jdownloader.py +++ b/cyberdrop_dl/scraper/jdownloader.py @@ -1,6 +1,5 @@ from __future__ import annotations -import asyncio from dataclasses import field from functools import wraps from pathlib import Path @@ -21,9 +20,9 @@ def error_wrapper(func: Callable) -> None: """Wrapper handles limits for scrape session.""" @wraps(func) - async def wrapper(self: JDownloader, *args, **kwargs) -> None: + def wrapper(self: JDownloader, *args, **kwargs) -> None: try: - return await func(self, *args, **kwargs) + return func(self, *args, **kwargs) except JDownloaderError as e: msg = e.message @@ -35,7 +34,6 @@ async def wrapper(self: JDownloader, *args, **kwargs) -> None: log(f"Failed JDownloader setup: {msg}", 40) self.enabled = False - await asyncio.sleep(20) return None return wrapper @@ -59,7 +57,7 @@ def __init__(self, manager: Manager) -> None: self.jdownloader_agent = field(init=False) @error_wrapper - async def jdownloader_setup(self) -> None: + def jdownloader_setup(self) -> None: """Setup function for JDownloader.""" if not all((self.jdownloader_username, self.jdownloader_password, self.jdownloader_device)): msg = "JDownloader credentials were not provided." @@ -69,7 +67,7 @@ async def jdownloader_setup(self) -> None: jd.connect(self.jdownloader_username, self.jdownloader_password) self.jdownloader_agent = jd.get_device(self.jdownloader_device) - async def direct_unsupported_to_jdownloader( + def direct_unsupported_to_jdownloader( self, url: URL, title: str, diff --git a/cyberdrop_dl/scraper/scraper.py b/cyberdrop_dl/scraper/scraper.py index 997d378ed..5c5065382 100644 --- a/cyberdrop_dl/scraper/scraper.py +++ b/cyberdrop_dl/scraper/scraper.py @@ -20,7 +20,7 @@ remove_trailing_slash, ) from cyberdrop_dl.scraper.jdownloader import JDownloader -from cyberdrop_dl.utils.constants import BLOCKED_DOMAINS +from cyberdrop_dl.utils.constants import BLOCKED_DOMAINS, REGEX_LINKS from cyberdrop_dl.utils.dataclasses.url_objects import MediaItem, ScrapeItem from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.utilities import get_download_path, get_filename_and_ext @@ -87,98 +87,98 @@ def __init__(self, manager: Manager) -> None: self.lock = asyncio.Lock() self.count = 0 - async def bunkr(self) -> None: + def bunkr(self) -> None: """Creates a Bunkr Crawler instance.""" from cyberdrop_dl.scraper.crawlers.bunkrr_crawler import BunkrrCrawler self.existing_crawlers["bunkrr"] = BunkrrCrawler(self.manager) self.existing_crawlers["bunkr"] = self.existing_crawlers["bunkrr"] - async def celebforum(self) -> None: + def celebforum(self) -> None: """Creates a CelebForum Crawler instance.""" from cyberdrop_dl.scraper.crawlers.celebforum_crawler import CelebForumCrawler self.existing_crawlers["celebforum"] = CelebForumCrawler(self.manager) - async def coomer(self) -> None: + def coomer(self) -> None: """Creates a Coomer Crawler instance.""" from cyberdrop_dl.scraper.crawlers.coomer_crawler import CoomerCrawler self.existing_crawlers["coomer"] = CoomerCrawler(self.manager) - async def cyberdrop(self) -> None: + def cyberdrop(self) -> None: """Creates a Cyberdrop Crawler instance.""" from cyberdrop_dl.scraper.crawlers.cyberdrop_crawler import CyberdropCrawler self.existing_crawlers["cyberdrop"] = CyberdropCrawler(self.manager) - async def cyberfile(self) -> None: + def cyberfile(self) -> None: """Creates a Cyberfile Crawler instance.""" from cyberdrop_dl.scraper.crawlers.cyberfile_crawler import CyberfileCrawler self.existing_crawlers["cyberfile"] = CyberfileCrawler(self.manager) - async def ehentai(self) -> None: + def ehentai(self) -> None: """Creates a EHentai Crawler instance.""" from cyberdrop_dl.scraper.crawlers.ehentai_crawler import EHentaiCrawler self.existing_crawlers["e-hentai"] = EHentaiCrawler(self.manager) - async def erome(self) -> None: + def erome(self) -> None: """Creates a Erome Crawler instance.""" from cyberdrop_dl.scraper.crawlers.erome_crawler import EromeCrawler self.existing_crawlers["erome"] = EromeCrawler(self.manager) - async def fapello(self) -> None: + def fapello(self) -> None: """Creates a Fappelo Crawler instance.""" from cyberdrop_dl.scraper.crawlers.fapello_crawler import FapelloCrawler self.existing_crawlers["fapello"] = FapelloCrawler(self.manager) - async def f95zone(self) -> None: + def f95zone(self) -> None: """Creates a F95Zone Crawler instance.""" from cyberdrop_dl.scraper.crawlers.f95zone_crawler import F95ZoneCrawler self.existing_crawlers["f95zone"] = F95ZoneCrawler(self.manager) - async def gofile(self) -> None: + def gofile(self) -> None: """Creates a GoFile Crawler instance.""" from cyberdrop_dl.scraper.crawlers.gofile_crawler import GoFileCrawler self.existing_crawlers["gofile"] = GoFileCrawler(self.manager) - async def hotpic(self) -> None: + def hotpic(self) -> None: """Creates a HotPic Crawler instance.""" from cyberdrop_dl.scraper.crawlers.hotpic_crawler import HotPicCrawler self.existing_crawlers["hotpic"] = HotPicCrawler(self.manager) - async def imageban(self) -> None: + def imageban(self) -> None: """Creates a ImageBan Crawler instance.""" from cyberdrop_dl.scraper.crawlers.imageban_crawler import ImageBanCrawler self.existing_crawlers["imageban"] = ImageBanCrawler(self.manager) - async def imgbb(self) -> None: + def imgbb(self) -> None: """Creates a ImgBB Crawler instance.""" from cyberdrop_dl.scraper.crawlers.imgbb_crawler import ImgBBCrawler self.existing_crawlers["ibb.co"] = ImgBBCrawler(self.manager) - async def imgbox(self) -> None: + def imgbox(self) -> None: """Creates a ImgBox Crawler instance.""" from cyberdrop_dl.scraper.crawlers.imgbox_crawler import ImgBoxCrawler self.existing_crawlers["imgbox"] = ImgBoxCrawler(self.manager) - async def imgur(self) -> None: + def imgur(self) -> None: """Creates a Imgur Crawler instance.""" from cyberdrop_dl.scraper.crawlers.imgur_crawler import ImgurCrawler self.existing_crawlers["imgur"] = ImgurCrawler(self.manager) - async def chevereto(self) -> None: + def chevereto(self) -> None: """Creates a Chevereto Crawler instance.""" from cyberdrop_dl.scraper.crawlers.chevereto_crawler import CheveretoCrawler @@ -189,158 +189,158 @@ async def chevereto(self) -> None: else: self.existing_crawlers[domain] = CheveretoCrawler(self.manager, domain) - async def kemono(self) -> None: + def kemono(self) -> None: """Creates a Kemono Crawler instance.""" from cyberdrop_dl.scraper.crawlers.kemono_crawler import KemonoCrawler self.existing_crawlers["kemono"] = KemonoCrawler(self.manager) - async def leakedmodels(self) -> None: + def leakedmodels(self) -> None: """Creates a LeakedModels Crawler instance.""" from cyberdrop_dl.scraper.crawlers.leakedmodels_crawler import LeakedModelsCrawler self.existing_crawlers["leakedmodels"] = LeakedModelsCrawler(self.manager) - async def mediafire(self) -> None: + def mediafire(self) -> None: """Creates a MediaFire Crawler instance.""" from cyberdrop_dl.scraper.crawlers.mediafire_crawler import MediaFireCrawler self.existing_crawlers["mediafire"] = MediaFireCrawler(self.manager) - async def nekohouse(self) -> None: + def nekohouse(self) -> None: """Creates a Nekohouse Crawler instance.""" from cyberdrop_dl.scraper.crawlers.nekohouse_crawler import NekohouseCrawler self.existing_crawlers["nekohouse"] = NekohouseCrawler(self.manager) - async def nudostar(self) -> None: + def nudostar(self) -> None: """Creates a NudoStar Crawler instance.""" from cyberdrop_dl.scraper.crawlers.nudostar_crawler import NudoStarCrawler self.existing_crawlers["nudostar.com"] = NudoStarCrawler(self.manager) - async def nudostartv(self) -> None: + def nudostartv(self) -> None: """Creates a NudoStarTV Crawler instance.""" from cyberdrop_dl.scraper.crawlers.nudostartv_crawler import NudoStarTVCrawler self.existing_crawlers["nudostar.tv"] = NudoStarTVCrawler(self.manager) - async def omegascans(self) -> None: + def omegascans(self) -> None: """Creates a OmegaScans Crawler instance.""" from cyberdrop_dl.scraper.crawlers.omegascans_crawler import OmegaScansCrawler self.existing_crawlers["omegascans"] = OmegaScansCrawler(self.manager) - async def pimpandhost(self) -> None: + def pimpandhost(self) -> None: """Creates a PimpAndHost Crawler instance.""" from cyberdrop_dl.scraper.crawlers.pimpandhost_crawler import PimpAndHostCrawler self.existing_crawlers["pimpandhost"] = PimpAndHostCrawler(self.manager) - async def pixeldrain(self) -> None: + def pixeldrain(self) -> None: """Creates a PixelDrain Crawler instance.""" from cyberdrop_dl.scraper.crawlers.pixeldrain_crawler import PixelDrainCrawler self.existing_crawlers["pixeldrain"] = PixelDrainCrawler(self.manager) - async def postimg(self) -> None: + def postimg(self) -> None: """Creates a PostImg Crawler instance.""" from cyberdrop_dl.scraper.crawlers.postimg_crawler import PostImgCrawler self.existing_crawlers["postimg"] = PostImgCrawler(self.manager) - async def realbooru(self) -> None: + def realbooru(self) -> None: """Creates a RealBooru Crawler instance.""" from cyberdrop_dl.scraper.crawlers.realbooru_crawler import RealBooruCrawler self.existing_crawlers["realbooru"] = RealBooruCrawler(self.manager) - async def realdebrid(self) -> None: + def realdebrid(self) -> None: """Creates a RealDebrid Crawler instance.""" from cyberdrop_dl.scraper.crawlers.realdebrid_crawler import RealDebridCrawler self.existing_crawlers["real-debrid"] = RealDebridCrawler(self.manager) - async def reddit(self) -> None: + def reddit(self) -> None: """Creates a Reddit Crawler instance.""" from cyberdrop_dl.scraper.crawlers.reddit_crawler import RedditCrawler self.existing_crawlers["reddit"] = RedditCrawler(self.manager) self.existing_crawlers["redd.it"] = self.existing_crawlers["reddit"] - async def redgifs(self) -> None: + def redgifs(self) -> None: """Creates a RedGifs Crawler instance.""" from cyberdrop_dl.scraper.crawlers.redgifs_crawler import RedGifsCrawler self.existing_crawlers["redgifs"] = RedGifsCrawler(self.manager) - async def rule34vault(self) -> None: + def rule34vault(self) -> None: """Creates a Rule34Vault Crawler instance.""" from cyberdrop_dl.scraper.crawlers.rule34vault_crawler import Rule34VaultCrawler self.existing_crawlers["rule34vault"] = Rule34VaultCrawler(self.manager) - async def rule34xxx(self) -> None: + def rule34xxx(self) -> None: """Creates a Rule34XXX Crawler instance.""" from cyberdrop_dl.scraper.crawlers.rule34xxx_crawler import Rule34XXXCrawler self.existing_crawlers["rule34.xxx"] = Rule34XXXCrawler(self.manager) - async def rule34xyz(self) -> None: + def rule34xyz(self) -> None: """Creates a Rule34XYZ Crawler instance.""" from cyberdrop_dl.scraper.crawlers.rule34xyz_crawler import Rule34XYZCrawler self.existing_crawlers["rule34.xyz"] = Rule34XYZCrawler(self.manager) - async def saint(self) -> None: + def saint(self) -> None: """Creates a Saint Crawler instance.""" from cyberdrop_dl.scraper.crawlers.saint_crawler import SaintCrawler self.existing_crawlers["saint"] = SaintCrawler(self.manager) - async def scrolller(self) -> None: + def scrolller(self) -> None: """Creates a Scrolller Crawler instance.""" from cyberdrop_dl.scraper.crawlers.scrolller_crawler import ScrolllerCrawler self.existing_crawlers["scrolller"] = ScrolllerCrawler(self.manager) - async def simpcity(self) -> None: + def simpcity(self) -> None: """Creates a SimpCity Crawler instance.""" from cyberdrop_dl.scraper.crawlers.simpcity_crawler import SimpCityCrawler self.existing_crawlers["simpcity"] = SimpCityCrawler(self.manager) - async def socialmediagirls(self) -> None: + def socialmediagirls(self) -> None: """Creates a SocialMediaGirls Crawler instance.""" from cyberdrop_dl.scraper.crawlers.socialmediagirls_crawler import SocialMediaGirlsCrawler self.existing_crawlers["socialmediagirls"] = SocialMediaGirlsCrawler(self.manager) - async def tokyomotion(self) -> None: + def tokyomotion(self) -> None: """Creates a Tokyomotion Crawler instance.""" from cyberdrop_dl.scraper.crawlers.tokyomotion_crawler import TokioMotionCrawler self.existing_crawlers["tokyomotion"] = TokioMotionCrawler(self.manager) - async def toonily(self) -> None: + def toonily(self) -> None: """Creates a Toonily Crawler instance.""" from cyberdrop_dl.scraper.crawlers.toonily_crawler import ToonilyCrawler self.existing_crawlers["toonily"] = ToonilyCrawler(self.manager) - async def xbunker(self) -> None: + def xbunker(self) -> None: """Creates a XBunker Crawler instance.""" from cyberdrop_dl.scraper.crawlers.xbunker_crawler import XBunkerCrawler self.existing_crawlers["xbunker"] = XBunkerCrawler(self.manager) - async def xbunkr(self) -> None: + def xbunkr(self) -> None: """Creates a XBunkr Crawler instance.""" from cyberdrop_dl.scraper.crawlers.xbunkr_crawler import XBunkrCrawler self.existing_crawlers["xbunkr"] = XBunkrCrawler(self.manager) - async def xxxbunker(self) -> None: + def xxxbunker(self) -> None: """Creates a XXXBunker Crawler instance.""" from cyberdrop_dl.scraper.crawlers.xxxbunker_crawler import XXXBunkerCrawler @@ -348,38 +348,38 @@ async def xxxbunker(self) -> None: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - async def start_scrapers(self) -> None: + def start_scrapers(self) -> None: """Starts all scrapers.""" for domain in self.mapping: - await self.mapping[domain]() + self.mapping[domain]() for crawler in self.existing_crawlers.values(): if isinstance(crawler.client, Field): - await crawler.startup() + crawler.startup() - async def start_jdownloader(self) -> None: + def start_jdownloader(self) -> None: """Starts JDownloader.""" if self.jdownloader.enabled and isinstance(self.jdownloader.jdownloader_agent, Field): - await self.jdownloader.jdownloader_setup() + self.jdownloader.jdownloader_setup() - async def start_real_debrid(self) -> None: + def start_real_debrid(self) -> None: """Starts RealDebrid.""" if isinstance(self.manager.real_debrid_manager.api, Field): - await self.manager.real_debrid_manager.startup() + self.manager.real_debrid_manager.startup() if self.manager.real_debrid_manager.enabled: - await self.realdebrid() - await self.existing_crawlers["real-debrid"].startup() + self.realdebrid() + self.existing_crawlers["real-debrid"].startup() async def start(self) -> None: """Starts the orchestra.""" self.manager.scrape_mapper = self - await self.start_scrapers() - await self.start_jdownloader() - await self.start_real_debrid() + self.start_scrapers() + self.start_jdownloader() + self.start_real_debrid() - await self.no_crawler_downloader.startup() + self.no_crawler_downloader.startup() if self.manager.args_manager.retry_failed: await self.load_failed_links() @@ -393,7 +393,7 @@ async def start(self) -> None: """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @staticmethod - async def regex_links(line: str) -> list: + def regex_links(line: str) -> list: """Regex grab the links from the URLs.txt file. This allows code blocks or full paragraphs to be copy and pasted into the URLs.txt. @@ -402,10 +402,7 @@ async def regex_links(line: str) -> list: if line.lstrip().rstrip().startswith("#"): return yarl_links - all_links = [ - x.group().replace(".md.", ".") - for x in re.finditer(r"(?:http.*?)(?=($|\n|\r\n|\r|\s|\"|\[/URL]|']\[|]\[|\[/img]))", line) - ] + all_links = [x.group().replace(".md.", ".") for x in re.finditer(REGEX_LINKS, line)] for link in all_links: encoded = "%" in link yarl_links.append(URL(link, encoded=encoded)) @@ -427,11 +424,11 @@ async def parse_input_file_groups(self) -> dict[str, URL]: links[thread_title] = [] if thread_title: - links[thread_title].extend(await self.regex_links(line)) + links[thread_title].extend(self.regex_links(line)) else: block_quote = not block_quote if line == "#\n" else block_quote if not block_quote: - links[""].extend(await self.regex_links(line)) + links[""].extend(self.regex_links(line)) return links async def load_links(self) -> None: @@ -573,7 +570,7 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: try: download_folder = get_download_path(self.manager, scrape_item, "jdownloader") relative_download_dir = download_folder.relative_to(self.manager.path_manager.download_dir) - await self.jdownloader.direct_unsupported_to_jdownloader( + self.jdownloader.direct_unsupported_to_jdownloader( scrape_item.url, scrape_item.parent_title, relative_download_dir, @@ -585,12 +582,12 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: scrape_item.url, next(scrape_item.parents, None), ) - await self.manager.progress_manager.scrape_stats_progress.add_unsupported(sent_to_jdownloader=success) + self.manager.progress_manager.scrape_stats_progress.add_unsupported(sent_to_jdownloader=success) return log(f"Unsupported URL: {scrape_item.url}", 30) await self.manager.log_manager.write_unsupported_urls_log(scrape_item.url, next(scrape_item.parents, None)) - await self.manager.progress_manager.scrape_stats_progress.add_unsupported() + self.manager.progress_manager.scrape_stats_progress.add_unsupported() def filter_items(self, scrape_item: ScrapeItem) -> bool: """Pre-filter scrape items base on URL.""" @@ -627,7 +624,7 @@ async def skip_no_crawler_by_config(self, scrape_item: ScrapeItem) -> bool: ) if check_complete: log(f"Skipping {scrape_item.url} as it has already been downloaded", 10) - await self.manager.progress_manager.download_progress.add_previously_completed() + self.manager.progress_manager.download_progress.add_previously_completed() return True posible_referer = scrape_item.parents[-1] if scrape_item.parents else scrape_item.url @@ -637,7 +634,7 @@ async def skip_no_crawler_by_config(self, scrape_item: ScrapeItem) -> bool: if check_referer: log(f"Skipping {scrape_item.url} as referer has been seen before", 10) - await self.manager.progress_manager.download_progress.add_skipped() + self.manager.progress_manager.download_progress.add_skipped() return True return False diff --git a/cyberdrop_dl/ui/progress/downloads_progress.py b/cyberdrop_dl/ui/progress/downloads_progress.py index 73aefbb9b..d221af685 100644 --- a/cyberdrop_dl/ui/progress/downloads_progress.py +++ b/cyberdrop_dl/ui/progress/downloads_progress.py @@ -41,11 +41,11 @@ def __init__(self, manager: Manager) -> None: subtitle=f"Total Files: [white]{self.total_files}", ) - async def get_progress(self) -> Panel: + def get_progress(self) -> Panel: """Returns the progress bar.""" return self.panel - async def update_total(self) -> None: + def update_total(self) -> None: """Updates the total number of files to be downloaded.""" self.total_files = self.total_files + 1 self.progress.update(self.completed_files_task_id, total=self.total_files) @@ -54,24 +54,24 @@ async def update_total(self) -> None: self.progress.update(self.failed_files_task_id, total=self.total_files) self.panel.subtitle = f"Total Files: [white]{self.total_files}" - async def add_completed(self) -> None: + def add_completed(self) -> None: """Adds a completed file to the progress bar.""" self.progress.advance(self.completed_files_task_id, 1) self.completed_files += 1 - async def add_previously_completed(self, increase_total: bool = True) -> None: + def add_previously_completed(self, increase_total: bool = True) -> None: """Adds a previously completed file to the progress bar.""" if increase_total: - await self.update_total() + self.update_total() self.previously_completed_files += 1 self.progress.advance(self.previously_completed_files_task_id, 1) - async def add_skipped(self) -> None: + def add_skipped(self) -> None: """Adds a skipped file to the progress bar.""" self.progress.advance(self.skipped_files_task_id, 1) self.skipped_files += 1 - async def add_failed(self) -> None: + def add_failed(self) -> None: """Adds a failed file to the progress bar.""" self.progress.advance(self.failed_files_task_id, 1) self.failed_files += 1 diff --git a/cyberdrop_dl/ui/progress/file_progress.py b/cyberdrop_dl/ui/progress/file_progress.py index 7178a08bf..76b1acfb7 100644 --- a/cyberdrop_dl/ui/progress/file_progress.py +++ b/cyberdrop_dl/ui/progress/file_progress.py @@ -19,7 +19,7 @@ from cyberdrop_dl.managers.manager import Manager -async def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: +def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: """Collapse and truncate or pad the given string to fit in the given length.""" return f"{s[:length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) @@ -67,11 +67,11 @@ def __init__(self, visible_tasks_limit: int, manager: Manager) -> None: self.tasks_visibility_limit = visible_tasks_limit self.downloaded_data = 0 - async def get_progress(self) -> Panel: + def get_progress(self) -> Panel: """Returns the progress bar.""" return Panel(self.progress_group, title="Downloads", border_style="green", padding=(1, 1)) - async def get_queue_length(self) -> int: + def get_queue_length(self) -> int: """Returns the number of tasks in the downloader queue.""" total = 0 @@ -81,7 +81,7 @@ async def get_queue_length(self) -> int: return total - async def redraw(self, passed: bool = False) -> None: + def redraw(self, passed: bool = False) -> None: """Redraws the progress bar.""" while len(self.visible_tasks) > self.tasks_visibility_limit: task_id = self.visible_tasks.pop(0) @@ -105,7 +105,7 @@ async def redraw(self, passed: bool = False) -> None: else: self.overflow.update(self.overflow_task_id, visible=False) - queue_length = await self.get_queue_length() + queue_length = self.get_queue_length() if queue_length > 0: self.queue.update( self.queue_task_id, @@ -116,13 +116,13 @@ async def redraw(self, passed: bool = False) -> None: self.queue.update(self.queue_task_id, visible=False) if not passed: - await self.manager.progress_manager.scraping_progress.redraw(True) + self.manager.progress_manager.scraping_progress.redraw(True) - async def add_task(self, file: str, expected_size: int | None) -> TaskID: + def add_task(self, file: str, expected_size: int | None) -> TaskID: """Adds a new task to the progress bar.""" description = file.split("/")[-1] description = description.encode("ascii", "ignore").decode().strip() - description = await adjust_title(description) + description = adjust_title(description) if len(self.visible_tasks) >= self.tasks_visibility_limit: task_id = self.progress.add_task( @@ -137,10 +137,10 @@ async def add_task(self, file: str, expected_size: int | None) -> TaskID: total=expected_size, ) self.visible_tasks.append(task_id) - await self.redraw() + self.redraw() return task_id - async def remove_file(self, task_id: TaskID) -> None: + def remove_file(self, task_id: TaskID) -> None: """Removes the given task from the progress bar.""" if task_id in self.visible_tasks: self.visible_tasks.remove(task_id) @@ -152,13 +152,13 @@ async def remove_file(self, task_id: TaskID) -> None: else: msg = "Task ID not found" raise ValueError(msg) - await self.redraw() + self.redraw() - async def advance_file(self, task_id: TaskID, amount: int) -> None: + def advance_file(self, task_id: TaskID, amount: int) -> None: """Advances the progress of the given task by the given amount.""" self.downloaded_data += amount if task_id in self.uninitiated_tasks: self.uninitiated_tasks.remove(task_id) self.invisible_tasks.append(task_id) - await self.redraw() + self.redraw() self.progress.advance(task_id, amount) diff --git a/cyberdrop_dl/ui/progress/hash_progress.py b/cyberdrop_dl/ui/progress/hash_progress.py index 7a6e80df0..cb6ee8dab 100644 --- a/cyberdrop_dl/ui/progress/hash_progress.py +++ b/cyberdrop_dl/ui/progress/hash_progress.py @@ -60,7 +60,7 @@ def __init__(self, manager: Manager) -> None: total=None, ) - async def get_hash_progress(self) -> Panel: + def get_hash_progress(self) -> Panel: """Returns the progress bar.""" return Panel( self.hash_progress_group, @@ -69,11 +69,11 @@ async def get_hash_progress(self) -> Panel: padding=(1, 1), ) - async def get_removed_progress(self) -> Panel: + def get_removed_progress(self) -> Panel: """Returns the progress bar.""" return Panel(self.removed_progress_group, border_style="green", padding=(1, 1)) - async def update_currently_hashing(self, file: Path) -> None: + def update_currently_hashing(self, file: Path) -> None: self.current_hashing_text.update(self.currently_hashing_task_id, description=f"[blue]{file}") self.current_hashing_text.update( @@ -81,22 +81,22 @@ async def update_currently_hashing(self, file: Path) -> None: description=f"[blue]{format_size(file.stat().st_size)}", ) - async def add_new_completed_hash(self) -> None: + def add_new_completed_hash(self) -> None: """Adds a completed file to the progress bar.""" self.hash_progress.advance(self.hashed_files_task_id, 1) self.hashed_files += 1 - async def add_prev_hash(self) -> None: + def add_prev_hash(self) -> None: """Adds a completed file to the progress bar.""" self.hash_progress.advance(self.prev_hashed_files_task_id, 1) self.prev_hashed_files += 1 - async def add_removed_file(self) -> None: + def add_removed_file(self) -> None: """Adds a completed file to the progress bar.""" self.remove_progress.advance(self.removed_files_task_id, 1) self.removed_files += 1 - async def add_removed_prev_file(self) -> None: + def add_removed_prev_file(self) -> None: """Adds a completed file to the progress bar.""" self.remove_progress.advance(self.removed_prev_files_task_id, 1) self.removed_prev_files += 1 diff --git a/cyberdrop_dl/ui/progress/scraping_progress.py b/cyberdrop_dl/ui/progress/scraping_progress.py index 09bfb6bca..84952395c 100644 --- a/cyberdrop_dl/ui/progress/scraping_progress.py +++ b/cyberdrop_dl/ui/progress/scraping_progress.py @@ -12,7 +12,7 @@ from cyberdrop_dl.managers.manager import Manager -async def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: +def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: """Collapse and truncate or pad the given string to fit in the given length.""" return f"{s[:length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) @@ -46,11 +46,11 @@ def __init__(self, visible_tasks_limit: int, manager: Manager) -> None: self.invisible_tasks: list[TaskID] = [] self.tasks_visibility_limit = visible_tasks_limit - async def get_progress(self) -> Panel: + def get_progress(self) -> Panel: """Returns the progress bar.""" return Panel(self.progress_group, title="Scraping", border_style="green", padding=(1, 1)) - async def get_queue_length(self) -> int: + def get_queue_length(self) -> int: """Returns the number of tasks in the scraper queue.""" total = 0 @@ -59,7 +59,7 @@ async def get_queue_length(self) -> int: return total - async def redraw(self, passed: bool = False) -> None: + def redraw(self, passed: bool = False) -> None: """Redraws the progress bar.""" while len(self.visible_tasks) > self.tasks_visibility_limit: task_id = self.visible_tasks.pop(0) @@ -83,7 +83,7 @@ async def redraw(self, passed: bool = False) -> None: else: self.overflow.update(self.overflow_task_id, visible=False) - queue_length = await self.get_queue_length() + queue_length = self.get_queue_length() if queue_length > 0: self.queue.update( self.queue_task_id, @@ -94,9 +94,9 @@ async def redraw(self, passed: bool = False) -> None: self.queue.update(self.queue_task_id, visible=False) if not passed: - await self.manager.progress_manager.file_progress.redraw(True) + self.manager.progress_manager.file_progress.redraw(True) - async def add_task(self, url: URL) -> TaskID: + def add_task(self, url: URL) -> TaskID: """Adds a new task to the progress bar.""" if len(self.visible_tasks) >= self.tasks_visibility_limit: task_id = self.progress.add_task( @@ -107,10 +107,10 @@ async def add_task(self, url: URL) -> TaskID: else: task_id = self.progress.add_task(self.progress_str.format(color=self.color, description=str(url))) self.visible_tasks.append(task_id) - await self.redraw() + self.redraw() return task_id - async def remove_task(self, task_id: TaskID) -> None: + def remove_task(self, task_id: TaskID) -> None: """Removes a task from the progress bar.""" if task_id in self.visible_tasks: self.visible_tasks.remove(task_id) @@ -122,4 +122,4 @@ async def remove_task(self, task_id: TaskID) -> None: else: msg = "Task ID not found" raise ValueError(msg) - await self.redraw() + self.redraw() diff --git a/cyberdrop_dl/ui/progress/sort_progress.py b/cyberdrop_dl/ui/progress/sort_progress.py index 3d3355248..4a118b165 100644 --- a/cyberdrop_dl/ui/progress/sort_progress.py +++ b/cyberdrop_dl/ui/progress/sort_progress.py @@ -10,7 +10,7 @@ from cyberdrop_dl.managers.manager import Manager -async def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: +def adjust_title(s: str, length: int = 40, placeholder: str = "...") -> str: """Collapse and truncate or pad the given string to fit in the given length.""" return f"{s[:length - len(placeholder)]}{placeholder}" if len(s) >= length else s.ljust(length) @@ -53,6 +53,12 @@ def __init__(self, visible_task_limit: int, manager: Manager) -> None: self.completed_tasks: list[TaskID] = [] self.uninitiated_tasks: list[TaskID] = [] self.tasks_visibility_limit = visible_task_limit + self.panel = Panel( + self.progress_group, + title=f"Sorting Downloads ━ Config: {self.manager.config_manager.loaded_config}", + border_style="green", + padding=(1, 1), + ) # counts self.audio_count = 0 @@ -60,19 +66,14 @@ def __init__(self, visible_task_limit: int, manager: Manager) -> None: self.image_count = 0 self.other_count = 0 - async def get_progress(self) -> Panel: + def get_progress(self) -> Panel: """Returns the progress bar.""" - return Panel( - self.progress_group, - title=f"Sorting Downloads ━ Config: {self.manager.config_manager.loaded_config}", - border_style="green", - padding=(1, 1), - ) + return self.panel - async def set_queue_length(self, length: int) -> None: + def set_queue_length(self, length: int) -> None: self.queue_length = length - async def redraw(self, passed: bool = False) -> None: + def redraw(self, passed: bool = False) -> None: """Redraws the progress bar.""" while len(self.visible_tasks) > self.tasks_visibility_limit: task_id = self.visible_tasks.pop(0) @@ -107,14 +108,14 @@ async def redraw(self, passed: bool = False) -> None: self.queue.update(self.queue_task_id, visible=False) if not passed: - await self.manager.progress_manager.scraping_progress.redraw(True) + self.manager.progress_manager.scraping_progress.redraw(True) - async def add_task(self, folder: str, expected_size: int | None) -> TaskID: + def add_task(self, folder: str, expected_size: int | None) -> TaskID: """Adds a new task to the progress bar.""" # description = f'Sorting {folder}' description = folder description = description.encode("ascii", "ignore").decode().strip() - description = await adjust_title(description) + description = adjust_title(description) if len(self.visible_tasks) >= self.tasks_visibility_limit: task_id = self.progress.add_task( @@ -129,10 +130,10 @@ async def add_task(self, folder: str, expected_size: int | None) -> TaskID: total=expected_size, ) self.visible_tasks.append(task_id) - await self.redraw() + self.redraw() return task_id - async def remove_folder(self, task_id: TaskID) -> None: + def remove_folder(self, task_id: TaskID) -> None: """Removes the given task from the progress bar.""" if task_id in self.visible_tasks: self.visible_tasks.remove(task_id) @@ -144,24 +145,24 @@ async def remove_folder(self, task_id: TaskID) -> None: else: msg = "Task ID not found" raise ValueError(msg) - await self.redraw() + self.redraw() - async def advance_folder(self, task_id: TaskID, amount: int) -> None: + def advance_folder(self, task_id: TaskID, amount: int) -> None: """Advances the progress of the given task by the given amount.""" if task_id in self.uninitiated_tasks: self.uninitiated_tasks.remove(task_id) self.invisible_tasks.append(task_id) - await self.redraw() + self.redraw() self.progress.advance(task_id, amount) - async def increment_audio(self) -> None: + def increment_audio(self) -> None: self.audio_count += 1 - async def increment_video(self) -> None: + def increment_video(self) -> None: self.video_count += 1 - async def increment_image(self) -> None: + def increment_image(self) -> None: self.image_count += 1 - async def increment_other(self) -> None: + def increment_other(self) -> None: self.other_count += 1 diff --git a/cyberdrop_dl/ui/progress/statistic_progress.py b/cyberdrop_dl/ui/progress/statistic_progress.py index 19c0c42c6..9f4f76265 100644 --- a/cyberdrop_dl/ui/progress/statistic_progress.py +++ b/cyberdrop_dl/ui/progress/statistic_progress.py @@ -14,7 +14,7 @@ class TaskInfo(NamedTuple): progress: float -async def get_tasks_info_sorted(progress: Progress) -> tuple: +def get_tasks_info_sorted(progress: Progress) -> tuple: tasks = [ TaskInfo( id=task.id, @@ -57,18 +57,18 @@ def __init__(self) -> None: subtitle=f"Total Download Failures: [white]{self.failed_files}", ) - async def get_progress(self) -> Panel: + def get_progress(self) -> Panel: """Returns the progress bar.""" return self.panel - async def update_total(self, total: int) -> None: + def update_total(self, total: int) -> None: """Updates the total number download failures.""" self.panel.subtitle = f"Total Download Failures: [white]{self.failed_files}" for key in self.failure_types: self.progress.update(self.failure_types[key], total=total) # Sort tasks on UI - tasks_sorted, were_sorted = await get_tasks_info_sorted(self.progress) + tasks_sorted, were_sorted = get_tasks_info_sorted(self.progress) if not were_sorted: for task_id in [task.id for task in tasks_sorted]: @@ -81,7 +81,7 @@ async def update_total(self, total: int) -> None: completed=task.completed, ) - async def add_failure(self, failure_type: Union[str, int]) -> None: + def add_failure(self, failure_type: Union[str, int]) -> None: """Adds a failed file to the progress bar.""" self.failed_files += 1 if isinstance(failure_type, int): @@ -98,7 +98,7 @@ async def add_failure(self, failure_type: Union[str, int]) -> None: total=self.failed_files, completed=1, ) - await self.update_total(self.failed_files) + self.update_total(self.failed_files) def return_totals(self) -> dict: """Returns the total number of failed files.""" @@ -135,18 +135,18 @@ def __init__(self) -> None: subtitle=f"Total Scrape Failures: [white]{self.failed_files}", ) - async def get_progress(self) -> Panel: + def get_progress(self) -> Panel: """Returns the progress bar.""" return self.panel - async def update_total(self, total: int) -> None: + def update_total(self, total: int) -> None: """Updates the total number of scrape failures.""" self.panel.subtitle = f"Total Scrape Failures: [white]{self.failed_files}" for key in self.failure_types: self.progress.update(self.failure_types[key], total=total) # Sort tasks on UI - tasks_sorted, were_sorted = await get_tasks_info_sorted(self.progress) + tasks_sorted, were_sorted = get_tasks_info_sorted(self.progress) if not were_sorted: for task_id in [task.id for task in tasks_sorted]: @@ -159,7 +159,7 @@ async def update_total(self, total: int) -> None: completed=task.completed, ) - async def add_failure(self, failure_type: Union[str, int]) -> None: + def add_failure(self, failure_type: Union[str, int]) -> None: """Adds a failed site to the progress bar.""" self.failed_files += 1 if isinstance(failure_type, int): @@ -173,9 +173,9 @@ async def add_failure(self, failure_type: Union[str, int]) -> None: total=self.failed_files, completed=1, ) - await self.update_total(self.failed_files) + self.update_total(self.failed_files) - async def add_unsupported(self, sent_to_jdownloader: bool = False) -> None: + def add_unsupported(self, sent_to_jdownloader: bool = False) -> None: """Adds an unsupported url to the progress bar.""" self.unsupported_urls += 1 if sent_to_jdownloader: diff --git a/cyberdrop_dl/ui/prompts/general_prompts.py b/cyberdrop_dl/ui/prompts/general_prompts.py index 64bd4be3c..85c110964 100644 --- a/cyberdrop_dl/ui/prompts/general_prompts.py +++ b/cyberdrop_dl/ui/prompts/general_prompts.py @@ -1,6 +1,5 @@ from __future__ import annotations -import asyncio import os import pathlib from typing import TYPE_CHECKING @@ -138,14 +137,13 @@ def import_cyberdrop_v4_items_prompt(manager: Manager) -> None: if import_download_history_path.is_file(): transfer_v4_db(import_download_history_path, manager.path_manager.history_db) else: - loop = asyncio.new_event_loop() for ele in pathlib.Path(import_download_history_path).glob("**/*.sqlite"): if str(ele) == str(manager.path_manager.history_db): continue try: transfer_v4_db(ele, manager.path_manager.history_db) except Exception as e: - loop.run_until_complete(log(f"Error importing {ele.name}: {e!s}", 20)) + log(f"Error importing {ele.name}: {e!s}", 20) # Done elif action == 3: diff --git a/cyberdrop_dl/utils/constants.py b/cyberdrop_dl/utils/constants.py index 160527f60..8dc223f17 100644 --- a/cyberdrop_dl/utils/constants.py +++ b/cyberdrop_dl/utils/constants.py @@ -26,6 +26,7 @@ # regex RAR_MULTIPART_PATTERN = re.compile(r"^part\d+") SANITIZE_FILENAME_PATTERN = re.compile(r'[<>:"/\\|?*\']') +REGEX_LINKS = re.compile(r"(?:http.*?)(?=($|\n|\r\n|\r|\s|\"|\[/URL]|']\[|]\[|\[/img]))") class CustomHTTPStatus(IntEnum): diff --git a/cyberdrop_dl/utils/database/table_definitions.py b/cyberdrop_dl/utils/database/table_definitions.py index 543efd7a9..2b65e2b12 100644 --- a/cyberdrop_dl/utils/database/table_definitions.py +++ b/cyberdrop_dl/utils/database/table_definitions.py @@ -1,34 +1,35 @@ -create_history = """CREATE TABLE IF NOT EXISTS media (domain TEXT, - url_path TEXT, - referer TEXT, - album_id TEXT, - download_path TEXT, - download_filename TEXT, - original_filename TEXT, - completed INTEGER NOT NULL, - created_at TIMESTAMP, - completed_at TIMESTAMP, - PRIMARY KEY (domain, url_path, original_filename) - );""" +create_history = """CREATE TABLE IF NOT EXISTS media ( + domain TEXT, + url_path TEXT, + referer TEXT, + album_id TEXT, + download_path TEXT, + download_filename TEXT, + original_filename TEXT, + completed INTEGER NOT NULL, + created_at TIMESTAMP, + completed_at TIMESTAMP, + PRIMARY KEY (domain, url_path, original_filename) +);""" -create_fixed_history = """CREATE TABLE IF NOT EXISTS media_copy (domain TEXT, - url_path TEXT, - referer TEXT, - album_id TEXT, - download_path TEXT, - download_filename TEXT, - original_filename TEXT, - file_size INT, - completed INTEGER NOT NULL, - PRIMARY KEY (domain, url_path, original_filename) - );""" +create_fixed_history = """CREATE TABLE IF NOT EXISTS media_copy ( + domain TEXT, + url_path TEXT, + referer TEXT, + album_id TEXT, + download_path TEXT, + download_filename TEXT, + original_filename TEXT, + file_size INT, + completed INTEGER NOT NULL, + PRIMARY KEY (domain, url_path, original_filename) +);""" create_temp = """CREATE TABLE IF NOT EXISTS temp (downloaded_filename TEXT);""" create_temp_referer = """CREATE TABLE IF NOT EXISTS temp_referer (referer TEXT);""" -create_hash = """ -CREATE TABLE IF NOT EXISTS hash ( +create_hash = """CREATE TABLE IF NOT EXISTS hash ( folder TEXT, download_filename TEXT, original_filename TEXT, diff --git a/cyberdrop_dl/utils/database/tables/history_table.py b/cyberdrop_dl/utils/database/tables/history_table.py index 1fc082ced..1520505c3 100644 --- a/cyberdrop_dl/utils/database/tables/history_table.py +++ b/cyberdrop_dl/utils/database/tables/history_table.py @@ -16,7 +16,7 @@ from cyberdrop_dl.utils.dataclasses.url_objects import MediaItem -async def get_db_path(url: URL, referer: str = "") -> str: +def get_db_path(url: URL, referer: str = "") -> str: """Gets the URL path to be put into the DB and checked from the DB.""" url_path = url.path @@ -29,7 +29,7 @@ async def get_db_path(url: URL, referer: str = "") -> str: return url_path -async def get_db_domain(domain: str) -> str: +def get_db_domain(domain: str) -> str: """Gets the domain to be put into the DB and checked from the DB.""" if domain in ( "img.kiwi", @@ -65,9 +65,9 @@ async def check_complete(self, domain: str, url: URL, referer: URL) -> bool: if self.ignore_history: return False - domain = await get_db_domain(domain) + domain = get_db_domain(domain) - url_path = await get_db_path(url, domain) + url_path = get_db_path(url, domain) cursor = await self.db_conn.cursor() result = await cursor.execute( """SELECT referer, completed FROM media WHERE domain = ? and url_path = ?""", @@ -90,7 +90,7 @@ async def check_album(self, domain: str, album_id: str) -> bool | dict[Any, Any] if self.ignore_history: return False - domain = await get_db_domain(domain) + domain = get_db_domain(domain) cursor = await self.db_conn.cursor() result = await cursor.execute( """SELECT url_path, completed FROM media WHERE domain = ? and album_id = ?""", @@ -101,8 +101,8 @@ async def check_album(self, domain: str, album_id: str) -> bool | dict[Any, Any] async def set_album_id(self, domain: str, media_item: MediaItem) -> None: """Sets an album_id in the database.""" - domain = await get_db_domain(domain) - url_path = await get_db_path(media_item.url, str(media_item.referer)) + domain = get_db_domain(domain) + url_path = get_db_path(media_item.url, str(media_item.referer)) await self.db_conn.execute( """UPDATE media SET album_id = ? WHERE domain = ? and url_path = ?""", (media_item.album_id, domain, url_path), @@ -114,7 +114,7 @@ async def check_complete_by_referer(self, domain: str, referer: URL) -> bool: if self.ignore_history: return False - domain = await get_db_domain(domain) + domain = get_db_domain(domain) cursor = await self.db_conn.cursor() result = await cursor.execute( """SELECT completed FROM media WHERE domain = ? and referer = ?""", @@ -125,8 +125,8 @@ async def check_complete_by_referer(self, domain: str, referer: URL) -> bool: async def insert_incompleted(self, domain: str, media_item: MediaItem) -> None: """Inserts an uncompleted file into the database.""" - domain = await get_db_domain(domain) - url_path = await get_db_path(media_item.url, str(media_item.referer)) + domain = get_db_domain(domain) + url_path = get_db_path(media_item.url, str(media_item.referer)) download_filename = media_item.download_filename if isinstance(media_item.download_filename, str) else "" try: await self.db_conn.execute( @@ -159,8 +159,8 @@ async def insert_incompleted(self, domain: str, media_item: MediaItem) -> None: async def mark_complete(self, domain: str, media_item: MediaItem) -> None: """Mark a download as completed in the database.""" - domain = await get_db_domain(domain) - url_path = await get_db_path(media_item.url, str(media_item.referer)) + domain = get_db_domain(domain) + url_path = get_db_path(media_item.url, str(media_item.referer)) await self.db_conn.execute( """UPDATE media SET completed = 1, completed_at = CURRENT_TIMESTAMP WHERE domain = ? and url_path = ?""", (domain, url_path), @@ -169,8 +169,8 @@ async def mark_complete(self, domain: str, media_item: MediaItem) -> None: async def add_filesize(self, domain: str, media_item: MediaItem) -> None: """Add the file size to the db.""" - domain = await get_db_domain(domain) - url_path = await get_db_path(media_item.url, str(media_item.referer)) + domain = get_db_domain(domain) + url_path = get_db_path(media_item.url, str(media_item.referer)) file_size = pathlib.Path(media_item.complete_file).stat().st_size await self.db_conn.execute( """UPDATE media SET file_size=? WHERE domain = ? and url_path = ?""", @@ -187,8 +187,8 @@ async def check_filename_exists(self, filename: str) -> bool: async def get_downloaded_filename(self, domain: str, media_item: MediaItem) -> str: """Returns the downloaded filename from the database.""" - domain = await get_db_domain(domain) - url_path = await get_db_path(media_item.url, str(media_item.referer)) + domain = get_db_domain(domain) + url_path = get_db_path(media_item.url, str(media_item.referer)) cursor = await self.db_conn.cursor() result = await cursor.execute( """SELECT download_filename FROM media WHERE domain = ? and url_path = ?""", diff --git a/cyberdrop_dl/utils/logger.py b/cyberdrop_dl/utils/logger.py index 836effed5..a25058ff0 100644 --- a/cyberdrop_dl/utils/logger.py +++ b/cyberdrop_dl/utils/logger.py @@ -9,8 +9,6 @@ logger = logging.getLogger("cyberdrop_dl") logger_debug = logging.getLogger("cyberdrop_dl_debug") - - console = Console() @@ -22,14 +20,14 @@ def log(message: Exception | str, level: int = 10, *, sleep: int | None = None, """Simple logging function.""" logger.log(level, message, **kwargs) log_debug(message, level, **kwargs) - log_debug_console(message, level, sleep=sleep, **kwargs) + log_debug_console(message, level, sleep=sleep) -def log_debug(message: Exception | str, level: int = 10, *kwargs) -> None: +def log_debug(message: Exception | str, level: int = 10, **kwargs) -> None: """Simple logging function.""" if constants.DEBUG_VAR: message = str(message) - logger_debug.log(level, message.encode("ascii", "ignore").decode("ascii"), *kwargs) + logger_debug.log(level, message.encode("ascii", "ignore").decode("ascii"), **kwargs) def log_debug_console(message: Exception | str, level: int, sleep: int | None = None) -> None: @@ -38,9 +36,9 @@ def log_debug_console(message: Exception | str, level: int, sleep: int | None = _log_to_console(level, message.encode("ascii", "ignore").decode("ascii"), sleep=sleep) -def log_with_color(message: str, style: str, level: int, show_in_stats: bool = True, *kwargs) -> None: +def log_with_color(message: str, style: str, level: int, show_in_stats: bool = True, **kwargs) -> None: """Simple logging function with color.""" - log(message, level, *kwargs) + log(message, level, **kwargs) text = Text(message, style=style) console.print(text) if show_in_stats: diff --git a/cyberdrop_dl/utils/sorting.py b/cyberdrop_dl/utils/sorting.py index d16d1adfc..c07520ab4 100644 --- a/cyberdrop_dl/utils/sorting.py +++ b/cyberdrop_dl/utils/sorting.py @@ -3,7 +3,6 @@ import asyncio import itertools import subprocess -from os import name as os_name from pathlib import Path from typing import TYPE_CHECKING @@ -13,7 +12,7 @@ from videoprops import get_audio_properties, get_video_properties from cyberdrop_dl.utils.constants import FILE_FORMATS -from cyberdrop_dl.utils.logger import log, log_with_color +from cyberdrop_dl.utils.logger import console, log, log_with_color from cyberdrop_dl.utils.utilities import purge_dir_tree if TYPE_CHECKING: @@ -48,17 +47,17 @@ def __init__(self, manager: Manager) -> None: self.video_count = 0 self.other_count = 0 - async def find_files_in_dir(self, directory: Path) -> list: + def find_files_in_dir(self, directory: Path) -> list[Path]: """Finds all files in a directory and returns them in a list.""" file_list = [] for x in directory.iterdir(): if x.is_file(): file_list.append(x) elif x.is_dir(): - file_list.extend(await self.find_files_in_dir(x)) + file_list.extend(self.find_files_in_dir(x)) return file_list - async def move_cd(self, file: Path, dest: Path) -> bool: + def move_cd(self, file: Path, dest: Path) -> bool: """Moves a file to a destination folder.""" try: dest.parent.mkdir(parents=True, exist_ok=True) @@ -77,7 +76,7 @@ async def move_cd(self, file: Path, dest: Path) -> bool: return True - async def check_dir_parents(self) -> bool: + def check_dir_parents(self) -> bool: """Checks if the sort dir is in the download dir.""" if self.download_dir in self.sorted_downloads.parents: log_with_color("Sort Directory cannot be in the Download Directory", "red", 40) @@ -93,7 +92,7 @@ async def sort(self) -> None: # make sort dir self.sorted_downloads.mkdir(parents=True, exist_ok=True) - if await self.check_dir_parents(): + if self.check_dir_parents(): return if not self.download_dir.is_dir(): @@ -101,44 +100,44 @@ async def sort(self) -> None: return download_folders = await self.get_download_folder() - async with self.manager.live_manager.get_sort_live(stop=True): + with self.manager.live_manager.get_sort_live(stop=True): all_scan_folders = list(filter(lambda x: x.is_dir(), self.download_dir.iterdir())) queue_length = len(all_scan_folders) - await self.manager.progress_manager.sort_progress.set_queue_length(queue_length) + self.manager.progress_manager.sort_progress.set_queue_length(queue_length) for folder in all_scan_folders: if self.sort_cdl_only and folder not in download_folders: continue - files = await self.find_files_in_dir(folder) + files = self.find_files_in_dir(folder) # add folder to progress and set number of files - task_id = await self.manager.progress_manager.sort_progress.add_task(folder.name, len(files)) + task_id = self.manager.progress_manager.sort_progress.add_task(folder.name, len(files)) for file in files: ext = file.suffix.lower() if ".part" in ext: continue if ext in FILE_FORMATS["Audio"]: - await self.sort_audio(file, folder.name) + self.sort_audio(file, folder.name) elif ext in FILE_FORMATS["Images"]: - await self.sort_image(file, folder.name) + self.sort_image(file, folder.name) elif ext in FILE_FORMATS["Videos"]: - await self.sort_video(file, folder.name) + self.sort_video(file, folder.name) else: - await self.sort_other(file, folder.name) - await self.manager.progress_manager.sort_progress.advance_folder( + self.sort_other(file, folder.name) + # advance folder progress by one file + self.manager.progress_manager.sort_progress.advance_folder( task_id, 1, - ) # advance folder progress by one file - await purge_dir_tree(folder) + ) + purge_dir_tree(folder) queue_length -= 1 - await self.manager.progress_manager.sort_progress.set_queue_length(queue_length) # update queue length - await self.manager.progress_manager.sort_progress.remove_folder(task_id) # remove folder from progress + self.manager.progress_manager.sort_progress.set_queue_length(queue_length) # update queue length + self.manager.progress_manager.sort_progress.remove_folder(task_id) # remove folder from progress await asyncio.sleep(1) - await purge_dir_tree(self.download_dir) + purge_dir_tree(self.download_dir) - clear_screen_proc = await asyncio.create_subprocess_shell("cls" if os_name == "nt" else "clear") - await clear_screen_proc.wait() + console.clear() async def get_download_folder(self) -> list[Path]: """Gets the download folder.""" @@ -165,7 +164,7 @@ async def get_download_folder(self) -> list[Path]: download_folders.extend(existing_folders) return list(set(download_folders)) - async def sort_audio(self, file: Path, base_name: str) -> None: + def sort_audio(self, file: Path, base_name: str) -> None: """Sorts an audio file into the sorted audio folder.""" self.audio_count += 1 @@ -198,10 +197,10 @@ async def sort_audio(self, file: Path, base_name: str) -> None: ), ) - if await self.move_cd(file, new_file): - await self.manager.progress_manager.sort_progress.increment_audio() + if self.move_cd(file, new_file): + self.manager.progress_manager.sort_progress.increment_audio() - async def sort_image(self, file: Path, base_name: str) -> None: + def sort_image(self, file: Path, base_name: str) -> None: """Sorts an image file into the sorted image folder.""" self.image_count += 1 @@ -230,10 +229,10 @@ async def sort_image(self, file: Path, base_name: str) -> None: ), ) - if await self.move_cd(file, new_file): - await self.manager.progress_manager.sort_progress.increment_image() + if self.move_cd(file, new_file): + self.manager.progress_manager.sort_progress.increment_image() - async def sort_video(self, file: Path, base_name: str) -> None: + def sort_video(self, file: Path, base_name: str) -> None: """Sorts a video file into the sorted video folder.""" self.video_count += 1 @@ -271,10 +270,10 @@ async def sort_video(self, file: Path, base_name: str) -> None: ), ) - if await self.move_cd(file, new_file): - await self.manager.progress_manager.sort_progress.increment_video() + if self.move_cd(file, new_file): + self.manager.progress_manager.sort_progress.increment_video() - async def sort_other(self, file: Path, base_name: str) -> None: + def sort_other(self, file: Path, base_name: str) -> None: """Sorts an other file into the sorted other folder.""" self.other_count += 1 @@ -294,5 +293,5 @@ async def sort_other(self, file: Path, base_name: str) -> None: ), ) - if await self.move_cd(file, new_file): - await self.manager.progress_manager.sort_progress.increment_other() + if self.move_cd(file, new_file): + self.manager.progress_manager.sort_progress.increment_other() diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 7db7c33c6..5ddeef1b2 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -218,6 +218,7 @@ def check_latest_pypi(log_to_console: bool = True, call_from_ui: bool = False) - latest_version: str = data["info"]["version"] releases = data["releases"].keys() message = color = None + level = 30 is_prelease, message = check_prelease_version(current_version, releases) if current_version not in releases: @@ -230,11 +231,12 @@ def check_latest_pypi(log_to_console: bool = True, call_from_ui: bool = False) - message = Text.from_markup(message) else: message = Text("You are currently on the latest version of Cyberdrop-DL") + level = 10 if call_from_ui: rich.print(message) elif log_to_console: - log_with_color(message.plain, color, 30, show_in_stats=False) + log_with_color(message.plain, color, level, show_in_stats=False) return current_version, latest_version