From 1947b52d840db6599ac4f161c776018e0d341e10 Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Thu, 28 Nov 2024 07:52:01 -0500 Subject: [PATCH] refactor: use pydantic models for config validation (#316) * refactor: use pydantic for config validation * refactor: replace every config setting reference * refactor: use yaml manager for every read/write * refactor: add AppriseURL type * refactor: use proper types * refactor: add default config values * refactor: add aliases * refactor: add yaml custom representers * refactor: add pydantic ValidationError handle * fix: AppriseURL Custom Model * refactor: remove SecretStr from auth data They are annoying to work with and we never log them so it's fine to use regular str * refactor: add HttpAppriseURL validator * refactor: use StrEnum for ScrapeItemType * refactor: replace yaml_manager with a module * refactor: use dataclasses for url_objects * refactor: update configs if some fields were missing * refactor: use pydantic models to create CLI args dinamically * refactor: update parsed_args references delete args_manager and use an instance of ParsedArgs * refactor: add aliases for input_file and download_folder * refactor: do args consolidation before path_startup * fix: incorrect merge of cli and config settings * fix: MediaItem references * sync: rebase from master * refactor: update config to pydantic config if necessary * refactor: remove dedupe options from global settings * refactor: add footer to InvalidYamlError * fix: deprecated warnings * refactor: add "ALL" config * fix: circular import * fix: download_speed_limit * fix: validation error handle for apprise.txt * fix: optional deprecated arguments * refactor: remove humanfriendly dependency --- cyberdrop_dl/clients/download_client.py | 52 +-- cyberdrop_dl/clients/errors.py | 7 +- cyberdrop_dl/clients/hash_client.py | 19 +- cyberdrop_dl/config_definitions/__init__.py | 9 + .../authentication_settings.py | 74 ++++ .../config_definitions/config_settings.py | 119 ++++++ .../config_definitions/custom_types.py | 110 ++++++ .../config_definitions/global_settings.py | 58 +++ cyberdrop_dl/downloader/downloader.py | 22 +- cyberdrop_dl/main.py | 51 +-- cyberdrop_dl/managers/args_manager.py | 164 --------- cyberdrop_dl/managers/cache_manager.py | 23 +- cyberdrop_dl/managers/client_manager.py | 42 +-- cyberdrop_dl/managers/config_manager.py | 293 ++++----------- cyberdrop_dl/managers/db_manager.py | 2 +- cyberdrop_dl/managers/download_manager.py | 29 +- cyberdrop_dl/managers/leaky.py | 9 +- cyberdrop_dl/managers/live_manager.py | 7 +- cyberdrop_dl/managers/log_manager.py | 2 +- cyberdrop_dl/managers/manager.py | 119 +++--- cyberdrop_dl/managers/path_manager.py | 105 +++--- cyberdrop_dl/managers/progress_manager.py | 6 +- cyberdrop_dl/managers/realdebrid_manager.py | 2 +- cyberdrop_dl/scraper/crawler.py | 37 +- .../scraper/crawlers/bunkrr_crawler.py | 29 +- .../scraper/crawlers/celebforum_crawler.py | 20 +- .../scraper/crawlers/coomer_crawler.py | 25 +- .../scraper/crawlers/cyberdrop_crawler.py | 6 +- .../scraper/crawlers/cyberfile_crawler.py | 16 +- .../scraper/crawlers/ehentai_crawler.py | 6 +- .../scraper/crawlers/erome_crawler.py | 12 +- .../scraper/crawlers/f95zone_crawler.py | 20 +- .../scraper/crawlers/fapello_crawler.py | 12 +- .../scraper/crawlers/gofile_crawler.py | 10 +- .../scraper/crawlers/hotpic_crawler.py | 6 +- .../scraper/crawlers/imageban_crawler.py | 12 +- .../scraper/crawlers/imgbb_crawler.py | 6 +- .../scraper/crawlers/imgbox_crawler.py | 6 +- .../scraper/crawlers/imgur_crawler.py | 8 +- .../scraper/crawlers/kemono_crawler.py | 26 +- .../scraper/crawlers/leakedmodels_crawler.py | 23 +- .../scraper/crawlers/mediafire_crawler.py | 6 +- .../scraper/crawlers/nekohouse_crawler.py | 4 +- .../scraper/crawlers/nudostar_crawler.py | 20 +- .../scraper/crawlers/omegascans_crawler.py | 6 +- .../scraper/crawlers/pimpandhost_crawler.py | 6 +- .../scraper/crawlers/pixeldrain_crawler.py | 6 +- .../scraper/crawlers/postimg_crawler.py | 6 +- .../scraper/crawlers/realbooru_crawler.py | 6 +- .../scraper/crawlers/reddit_crawler.py | 20 +- .../scraper/crawlers/redgifs_crawler.py | 6 +- .../scraper/crawlers/rule34vault_crawler.py | 12 +- .../scraper/crawlers/rule34xxx_crawler.py | 6 +- .../scraper/crawlers/rule34xyz_crawler.py | 6 +- .../scraper/crawlers/scrolller_crawler.py | 6 +- .../scraper/crawlers/simpcity_crawler.py | 24 +- .../crawlers/socialmediagirls_crawler.py | 22 +- .../scraper/crawlers/toonily_crawler.py | 12 +- .../scraper/crawlers/xbunker_crawler.py | 20 +- .../scraper/crawlers/xbunkr_crawler.py | 6 +- .../scraper/crawlers/xxxbunker_crawler.py | 2 +- cyberdrop_dl/scraper/jdownloader.py | 16 +- cyberdrop_dl/scraper/scraper.py | 50 ++- cyberdrop_dl/ui/program_ui.py | 20 +- cyberdrop_dl/ui/progress/hash_progress.py | 6 +- cyberdrop_dl/ui/prompts/user_prompts.py | 8 +- cyberdrop_dl/utils/args.py | 200 ++++++++++ cyberdrop_dl/utils/args/__init__.py | 0 cyberdrop_dl/utils/args/args.py | 347 ------------------ cyberdrop_dl/utils/args/config_definitions.py | 168 --------- cyberdrop_dl/utils/constants.py | 16 +- cyberdrop_dl/utils/cookie_extraction.py | 12 +- .../utils/data_enums_classes/url_objects.py | 105 +++--- .../utils/database/tables/history_table.py | 2 +- cyberdrop_dl/utils/logger.py | 7 +- cyberdrop_dl/utils/sorting.py | 18 +- .../utils/transfer/first_time_setup.py | 60 ++- .../utils/transfer/transfer_v4_config.py | 40 +- cyberdrop_dl/utils/utilities.py | 62 ++-- cyberdrop_dl/utils/yaml.py | 71 ++++ poetry.lock | 30 +- pyproject.toml | 1 - 82 files changed, 1363 insertions(+), 1682 deletions(-) create mode 100644 cyberdrop_dl/config_definitions/__init__.py create mode 100644 cyberdrop_dl/config_definitions/authentication_settings.py create mode 100644 cyberdrop_dl/config_definitions/config_settings.py create mode 100644 cyberdrop_dl/config_definitions/custom_types.py create mode 100644 cyberdrop_dl/config_definitions/global_settings.py delete mode 100644 cyberdrop_dl/managers/args_manager.py create mode 100644 cyberdrop_dl/utils/args.py delete mode 100644 cyberdrop_dl/utils/args/__init__.py delete mode 100644 cyberdrop_dl/utils/args/args.py delete mode 100644 cyberdrop_dl/utils/args/config_definitions.py create mode 100644 cyberdrop_dl/utils/yaml.py diff --git a/cyberdrop_dl/clients/download_client.py b/cyberdrop_dl/clients/download_client.py index 49f7b23fc..400adef3d 100644 --- a/cyberdrop_dl/clients/download_client.py +++ b/cyberdrop_dl/clients/download_client.py @@ -20,6 +20,8 @@ from collections.abc import Callable, Coroutine from typing import Any + from yarl import URL + from cyberdrop_dl.managers.client_manager import ClientManager from cyberdrop_dl.managers.manager import Manager from cyberdrop_dl.utils.data_enums_classes.url_objects import MediaItem @@ -90,6 +92,15 @@ async def on_request_end(*args): """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" + def add_api_key_headers(self, domain: str, referer: URL): + download_headers = copy.deepcopy(self._headers) + download_headers["Referer"] = str(referer) + auth_data = self.manager.config_manager.authentication_data + if domain == "pixeldrain" and auth_data.pixeldrain.api_key: + download_headers["Authorization"] = self.manager.download_manager.basic_auth( + "Cyberdrop-DL", auth_data.pixeldrain.api_key + ) + @limiter async def _download( self, @@ -100,23 +111,14 @@ async def _download( client_session: ClientSession, ) -> bool: """Downloads a file.""" - download_headers = copy.deepcopy(self._headers) - download_headers["Referer"] = str(media_item.referer) - if ( - domain == "pixeldrain" - and self.manager.config_manager.authentication_data["PixelDrain"]["pixeldrain_api_key"] - ): - download_headers["Authorization"] = self.manager.download_manager.basic_auth( - "Cyberdrop-DL", - self.manager.config_manager.authentication_data["PixelDrain"]["pixeldrain_api_key"], - ) + download_headers = self.add_api_key_headers(domain, media_item.referer) downloaded_filename = await self.manager.db_manager.history_table.get_downloaded_filename(domain, media_item) download_dir = self.get_download_dir(media_item) media_item.partial_file = download_dir / f"{downloaded_filename}.part" resume_point = 0 - if isinstance(media_item.partial_file, Path) and media_item.partial_file.exists(): + if media_item.partial_file and media_item.partial_file.exists(): resume_point = media_item.partial_file.stat().st_size if media_item.partial_file.exists() else 0 download_headers["Range"] = f"bytes={resume_point}-" @@ -136,7 +138,7 @@ async def _download( content_type = resp.headers.get("Content-Type") media_item.filesize = int(resp.headers.get("Content-Length", "0")) - if not isinstance(media_item.complete_file, Path): + if not media_item.complete_file: proceed, skip = await self.get_final_file_info(media_item, domain) await self.mark_incomplete(media_item, domain) self.client_manager.check_bunkr_maint(resp.headers) @@ -198,7 +200,7 @@ async def _append_content( async def download_file(self, manager: Manager, domain: str, media_item: MediaItem) -> bool: """Starts a file.""" - if self.manager.config_manager.settings_data["Download_Options"]["skip_download_mark_completed"]: + if self.manager.config_manager.settings_data.download_options.skip_download_mark_completed: log(f"Download Skip {media_item.url} due to mark completed option", 10) self.manager.progress_manager.download_progress.add_skipped() # set completed path @@ -235,7 +237,7 @@ async def mark_completed(self, domain: str, media_item: MediaItem) -> None: await self.manager.db_manager.history_table.mark_complete(domain, media_item) async def add_file_size(self, domain: str, media_item: MediaItem) -> None: - if not isinstance(media_item.complete_file, Path): + if not media_item.complete_file: media_item.complete_file = self.get_file_location(media_item) if media_item.complete_file.exists(): await self.manager.db_manager.history_table.add_filesize(domain, media_item) @@ -253,11 +255,11 @@ async def handle_media_item_completion(self, media_item: MediaItem, downloaded: def get_download_dir(self, media_item: MediaItem) -> Path: """Returns the download directory for the media item.""" download_folder = media_item.download_folder - if self.manager.args_manager.retry_any: + if self.manager.parsed_args.cli_only_args.retry_any: return download_folder - if self.manager.config_manager.settings_data["Download_Options"]["block_download_sub_folders"]: - while download_folder.parent != self.manager.path_manager.download_dir: + if self.manager.config_manager.settings_data.download_options.block_download_sub_folders: + while download_folder.parent != self.manager.path_manager.download_folder: download_folder = download_folder.parent media_item.download_folder = download_folder return download_folder @@ -271,7 +273,7 @@ async def get_final_file_info(self, media_item: MediaItem, domain: str) -> tuple media_item.complete_file = self.get_file_location(media_item) media_item.partial_file = media_item.complete_file.with_suffix(media_item.complete_file.suffix + ".part") - expected_size = media_item.filesize if isinstance(media_item.filesize, int) else None + expected_size = media_item.filesize proceed = True skip = False @@ -352,13 +354,13 @@ async def iterate_filename(self, complete_file: Path, media_item: MediaItem) -> def check_filesize_limits(self, media: MediaItem) -> bool: """Checks if the file size is within the limits.""" - file_size_limits = self.manager.config_manager.settings_data["File_Size_Limits"] - max_video_filesize = file_size_limits["maximum_video_size"] or float("inf") - min_video_filesize = file_size_limits["minimum_video_size"] - max_image_filesize = file_size_limits["maximum_image_size"] or float("inf") - min_image_filesize = file_size_limits["minimum_image_size"] - max_other_filesize = file_size_limits["maximum_other_size"] or float("inf") - min_other_filesize = file_size_limits["minimum_other_size"] + file_size_limits = self.manager.config_manager.settings_data.file_size_limits + max_video_filesize = file_size_limits.maximum_video_size or float("inf") + min_video_filesize = file_size_limits.minimum_video_size + max_image_filesize = file_size_limits.maximum_image_size or float("inf") + min_image_filesize = file_size_limits.minimum_image_size + max_other_filesize = file_size_limits.maximum_other_size or float("inf") + min_other_filesize = file_size_limits.minimum_other_size if media.ext in FILE_FORMATS["Images"]: proceed = min_image_filesize < media.filesize < max_image_filesize diff --git a/cyberdrop_dl/clients/errors.py b/cyberdrop_dl/clients/errors.py index 5bfe0b047..253d196eb 100644 --- a/cyberdrop_dl/clients/errors.py +++ b/cyberdrop_dl/clients/errors.py @@ -12,6 +12,10 @@ from cyberdrop_dl.scraper.crawler import ScrapeItem from cyberdrop_dl.utils.data_enums_classes.url_objects import MediaItem +VALIDATION_ERROR_FOOTER = """ +Read the documentation for guidance on how to resolve this error: https://script-ware.gitbook.io/cyberdrop-dl/reference/configuration-options +Please note, this is not a bug. Do not open issues related to this""" + class CDLBaseError(Exception): """Base exception for cyberdrop-dl errors.""" @@ -130,6 +134,5 @@ class InvalidYamlError(CDLBaseError): def __init__(self, file: Path, e: ConstructorError) -> None: """This error will be thrown when a yaml config file has invalid values.""" mark = e.problem_mark if hasattr(e, "problem_mark") else e - message = f"ERROR: File '{file}' has an invalid config. Please verify and edit it manually\n {mark}" - self.message_rich = message.replace("ERROR:", "[bold red]ERROR:[/bold red]") + message = f"File '{file.resolve()}' has an invalid config. Please verify and edit it manually\n {mark}\n\n{VALIDATION_ERROR_FOOTER}" super().__init__("Invalid YAML", message=message, origin=file) diff --git a/cyberdrop_dl/clients/hash_client.py b/cyberdrop_dl/clients/hash_client.py index 520470e9e..92bec6a0f 100644 --- a/cyberdrop_dl/clients/hash_client.py +++ b/cyberdrop_dl/clients/hash_client.py @@ -10,6 +10,7 @@ from send2trash import send2trash from cyberdrop_dl.ui.prompts.basic_prompts import enter_to_continue +from cyberdrop_dl.utils.data_enums_classes.hash import Hashing from cyberdrop_dl.utils.logger import log if TYPE_CHECKING: @@ -67,9 +68,9 @@ def _get_key_from_file(file: Path | str): async def hash_item_helper(self, file: Path | str, original_filename: str, referer: URL): hash = await self.hash_item(file, original_filename, referer, hash_type=self.xxhash) - if self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["add_md5_hash"]: + if self.manager.config_manager.settings_data.dupe_cleanup_options.add_md5_hash: await self.hash_item(file, original_filename, referer, hash_type=self.md5) - if self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["add_sha256_hash"]: + if self.manager.config_manager.settings_data.dupe_cleanup_options.add_sha256_hash: await self.hash_item(file, original_filename, referer, hash_type=self.sha256) return hash @@ -110,10 +111,7 @@ async def hash_item(self, file: Path | str, original_filename: str, referer: URL async def hash_item_during_download(self, media_item: MediaItem) -> None: try: - if ( - self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"] - != self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"].IN_PLACE - ): + if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing != Hashing.IN_PLACE: return await self.hash_item_helper(media_item.complete_file, media_item.original_filename, media_item.referer) except Exception as e: @@ -121,12 +119,9 @@ async def hash_item_during_download(self, media_item: MediaItem) -> None: async def cleanup_dupes_after_download(self) -> None: with self.manager.live_manager.get_hash_live(stop=True): - if ( - self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"] - == self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"].OFF - ): + if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing == Hashing.OFF: return - if not self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["auto_dedupe"]: + if not self.manager.config_manager.settings_data.dupe_cleanup_options.auto_dedupe: return file_hashes_dict = await self.get_file_hashes_dict() with self.manager.live_manager.get_remove_file_via_hash_live(stop=True): @@ -172,7 +167,7 @@ async def get_file_hashes_dict(self) -> dict: return hashes_dict def send2trash(self, path: Path) -> None: - if not self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["send_deleted_to_trash"]: + if not self.manager.config_manager.settings_data.dupe_cleanup_options.send_deleted_to_trash: Path(path).unlink(missing_ok=True) log(f"permanently deleted file at {path}", 10) return True diff --git a/cyberdrop_dl/config_definitions/__init__.py b/cyberdrop_dl/config_definitions/__init__.py new file mode 100644 index 000000000..25ab81551 --- /dev/null +++ b/cyberdrop_dl/config_definitions/__init__.py @@ -0,0 +1,9 @@ +from .authentication_settings import AuthSettings +from .config_settings import ConfigSettings +from .global_settings import GlobalSettings + +__all__ = { + AuthSettings, + ConfigSettings, + GlobalSettings, +} diff --git a/cyberdrop_dl/config_definitions/authentication_settings.py b/cyberdrop_dl/config_definitions/authentication_settings.py new file mode 100644 index 000000000..160729d49 --- /dev/null +++ b/cyberdrop_dl/config_definitions/authentication_settings.py @@ -0,0 +1,74 @@ +from pydantic import BaseModel, Field + +from .custom_types import AliasModel + + +class ForumAuth(BaseModel): + celebforum_xf_user_cookie: str = "" + celebforum_username: str = "" + celebforum_password: str = "" + f95zone_xf_user_cookie: str = "" + f95zone_username: str = "" + f95zone_password: str = "" + leakedmodels_xf_user_cookie: str = "" + leakedmodels_username: str = "" + leakedmodels_password: str = "" + nudostar_xf_user_cookie: str = "" + nudostar_username: str = "" + nudostar_password: str = "" + simpcity_xf_user_cookie: str = "" + simpcity_username: str = "" + simpcity_password: str = "" + socialmediagirls_xf_user_cookie: str = "" + socialmediagirls_username: str = "" + socialmediagirls_password: str = "" + xbunker_xf_user_cookie: str = "" + xbunker_username: str = "" + xbunker_password: str = "" + + +class CoomerAuth(BaseModel): + session: str = "" + + +class XXXBunkerAuth(BaseModel): + PHPSESSID: str = "" + + +class ImgurAuth(BaseModel): + client_id: str = "" + + +class JDownloaderAuth(AliasModel): + username: str = Field("", validation_alias="jdownloader_username") + password: str = Field("", validation_alias="jdownloader_password") + device: str = Field("", validation_alias="jdownloader_device") + + +class RedditAuth(BaseModel): + personal_use_script: str = "" + secret: str = "" + + +class GoFileAuth(AliasModel): + api_key: str = Field("", validation_alias="gofile_api_key") + + +class PixeldrainAuth(AliasModel): + api_key: str = Field("", validation_alias="pixeldrain_api_key") + + +class RealDebridAuth(AliasModel): + api_key: str = Field("", validation_alias="realdebrid_api_key") + + +class AuthSettings(AliasModel): + coomer: CoomerAuth = Field(validation_alias="Coomer", default=CoomerAuth()) + forums: ForumAuth = Field(validation_alias="Forums", default=ForumAuth()) + gofile: GoFileAuth = Field(validation_alias="GoFile", default=GoFileAuth()) + imgur: ImgurAuth = Field(validation_alias="Imgur", default=ImgurAuth()) + jdownloader: JDownloaderAuth = Field(validation_alias="JDownloader", default=JDownloaderAuth()) + pixeldrain: PixeldrainAuth = Field(validation_alias="PixelDrain", default=PixeldrainAuth()) + realdebrid: RealDebridAuth = Field(validation_alias="RealDebrid", default=RealDebridAuth()) + reddit: RedditAuth = Field(validation_alias="Reddit", default=RedditAuth()) + xxxbunker: XXXBunkerAuth = Field(validation_alias="XXXBunker", default=XXXBunkerAuth()) diff --git a/cyberdrop_dl/config_definitions/config_settings.py b/cyberdrop_dl/config_definitions/config_settings.py new file mode 100644 index 000000000..7b01d0504 --- /dev/null +++ b/cyberdrop_dl/config_definitions/config_settings.py @@ -0,0 +1,119 @@ +from logging import INFO +from pathlib import Path + +from pydantic import BaseModel, ByteSize, Field, NonNegativeInt, field_serializer + +from cyberdrop_dl.utils.constants import APP_STORAGE, BROWSERS, DOWNLOAD_STORAGE +from cyberdrop_dl.utils.data_enums_classes.hash import Hashing + +from .custom_types import AliasModel, HttpAppriseURLModel, NonEmptyStr + + +class DownloadOptions(BaseModel): + block_download_sub_folders: bool = False + disable_download_attempt_limit: bool = False + disable_file_timestamps: bool = False + include_album_id_in_folder_name: bool = False + include_thread_id_in_folder_name: bool = False + remove_domains_from_folder_names: bool = False + remove_generated_id_from_filenames: bool = False + scrape_single_forum_post: bool = False + separate_posts: bool = False + skip_download_mark_completed: bool = False + skip_referer_seen_before: bool = False + maximum_number_of_children: list[NonNegativeInt] = [] + + +class Files(AliasModel): + input_file: Path = Field(validation_alias="i", default=APP_STORAGE / "Configs" / "{config}" / "URLs.txt") + download_folder: Path = Field(validation_alias="d", default=DOWNLOAD_STORAGE) + + +class Logs(AliasModel): + log_folder: Path = APP_STORAGE / "Configs" / "{config}" / "Logs" + webhook: HttpAppriseURLModel | None = Field(validation_alias="webhook_url", default=None) + main_log_filename: NonEmptyStr = "downloader.log" + last_forum_post_filename: NonEmptyStr = "Last_Scraped_Forum_Posts.csv" + unsupported_urls_filename: NonEmptyStr = "Unsupported_URLs.csv" + download_error_urls_filename: NonEmptyStr = "Download_Error_URLs.csv" + scrape_error_urls_filename: NonEmptyStr = "Scrape_Error_URLs.csv" + rotate_logs: bool = False + + +class FileSizeLimits(BaseModel): + maximum_image_size: ByteSize = ByteSize(0) + maximum_other_size: ByteSize = ByteSize(0) + maximum_video_size: ByteSize = ByteSize(0) + minimum_image_size: ByteSize = ByteSize(0) + minimum_other_size: ByteSize = ByteSize(0) + minimum_video_size: ByteSize = ByteSize(0) + + @field_serializer("*") + def human_readable(self, value: ByteSize | int) -> str: + if not isinstance(value, ByteSize): + value = ByteSize(value) + return value.human_readable(decimal=True) + + +class IgnoreOptions(BaseModel): + exclude_videos: bool = False + exclude_images: bool = False + exclude_audio: bool = False + exclude_other: bool = False + ignore_coomer_ads: bool = False + skip_hosts: list[NonEmptyStr] = [] + only_hosts: list[NonEmptyStr] = [] + + +class RuntimeOptions(BaseModel): + ignore_history: bool = False + log_level: int = INFO + console_log_level: int = 100 + skip_check_for_partial_files: bool = False + skip_check_for_empty_folders: bool = False + delete_partial_files: bool = False + update_last_forum_post: bool = True + send_unsupported_to_jdownloader: bool = False + jdownloader_download_dir: Path | None = None + jdownloader_autostart: bool = False + jdownloader_whitelist: list[NonEmptyStr] = [] + + +class Sorting(BaseModel): + sort_downloads: bool = False + sort_folder: Path = DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" + scan_folder: Path | None = None + sort_cdl_only: bool = True + sort_incremementer_format: NonEmptyStr = " ({i})" + sorted_audio: NonEmptyStr = "{sort_dir}/{base_dir}/Audio/{filename}{ext}" + sorted_image: NonEmptyStr = "{sort_dir}/{base_dir}/Images/{filename}{ext}" + sorted_other: NonEmptyStr = "{sort_dir}/{base_dir}/Other/{filename}{ext}" + sorted_video: NonEmptyStr = "{sort_dir}/{base_dir}/Videos/{filename}{ext}" + + +class BrowserCookies(BaseModel): + browsers: list[BROWSERS] = [BROWSERS.chrome] + auto_import: bool = False + sites: list[NonEmptyStr] = [] + + +class DupeCleanupOptions(BaseModel): + hashing: Hashing = Hashing.IN_PLACE + auto_dedupe: bool = True + add_md5_hash: bool = False + add_sha256_hash: bool = False + send_deleted_to_trash: bool = True + + +class ConfigSettings(AliasModel): + browser_cookies: BrowserCookies = Field(validation_alias="Browser_Cookies", default=BrowserCookies()) + download_options: DownloadOptions = Field(validation_alias="Download_Options", default=DownloadOptions()) + dupe_cleanup_options: DupeCleanupOptions = Field( + validation_alias="Dupe_Cleanup_Options", default=DupeCleanupOptions() + ) + file_size_limits: FileSizeLimits = Field(validation_alias="File_Size_Limits", default=FileSizeLimits()) + files: Files = Field(validation_alias="Files", default=Files()) + ignore_options: IgnoreOptions = Field(validation_alias="Ignore_Options", default=IgnoreOptions()) + logs: Logs = Field(validation_alias="Logs", default=Logs()) + runtime_options: RuntimeOptions = Field(validation_alias="Runtime_Options", default=RuntimeOptions()) + sorting: Sorting = Field(validation_alias="Sorting", default=Sorting()) diff --git a/cyberdrop_dl/config_definitions/custom_types.py b/cyberdrop_dl/config_definitions/custom_types.py new file mode 100644 index 000000000..b0ede57a7 --- /dev/null +++ b/cyberdrop_dl/config_definitions/custom_types.py @@ -0,0 +1,110 @@ +from __future__ import annotations + +from typing import Annotated + +from pydantic import ( + AfterValidator, + AnyUrl, + BaseModel, + ConfigDict, + HttpUrl, + Secret, + SerializationInfo, + StringConstraints, + model_serializer, + model_validator, +) +from yarl import URL + + +def convert_to_yarl(value: AnyUrl) -> URL: + return URL(str(value)) + + +HttpURL = Annotated[HttpUrl, AfterValidator(convert_to_yarl)] +NonEmptyStr = Annotated[str, StringConstraints(min_length=1, strip_whitespace=True)] +AnyURL = Annotated[AnyUrl, AfterValidator(convert_to_yarl)] +SecretAnyURL = Secret[AnyURL] +SecretHttpURL = Secret[HttpURL] + + +class AliasModel(BaseModel): + model_config = ConfigDict(populate_by_name=True) + + +class FrozenModel(BaseModel): + model_config = ConfigDict(frozen=True) + + +class AppriseURLModel(FrozenModel): + url: SecretAnyURL + tags: set[NonEmptyStr] + + @model_serializer() + def serialize(self, info: SerializationInfo): + dump_secret = info.mode != "json" + url = self.url.get_secret_value() if dump_secret else self.url + tags = self.tags - set("no_logs") + return f"{','.join(tags)}{'=' if self.tags else ''}{url}" + + @model_validator(mode="before") + @staticmethod + def parse_input(value: dict | URL | str): + url_obj = value + tags = None + if isinstance(url_obj, dict): + tags = url_obj.get("tags") + url_obj = url_obj.get("url") + if isinstance(value, URL): + url_obj = str(value) + url = AppriseURL(url_obj, validate=False) + return {"url": url._url, "tags": tags or url.tags} + + +class AppriseURL: + _validator = AppriseURLModel + + def __init__(self, url: URL | str, tags: set | None = None, *, validate: bool = True): + self._actual_url = None + self._url = str(url) + if validate: + self._validate() + else: + self.parse_str(url, tags) + + @property + def tags(self) -> set[str]: + return self._tags + + @property + def url(self) -> URL: + self._validate() + return self._actual_url + + def parse_str(self, url: URL | str, tags: set | None = None): + self._tags = tags or set("no_logs") + self._url = str(url) + self._actual_url = url if isinstance(url, URL) else None + parts = self._url.split("://", 1)[0].split("=", 1) + if len(parts) == 2 and not self._actual_url: + self._tags = set(parts[0].split(",")) + self._url: str = url.split("=", 1)[-1] + + def _validate(self): + if not self._actual_url: + apprise_model = self._validator(url=self._url) + self._actual_url = apprise_model.url + + def __repr__(self): + return f"AppriseURL({self._url}, tags={self.tags})" + + def __str__(self): + return f"{','.join(self.tags)}{'=' if self.tags else ''}{self.url}" + + +class HttpAppriseURLModel(AppriseURLModel): + url: SecretHttpURL + + +class HttpAppriseURL(AppriseURL): + _validator = HttpAppriseURLModel diff --git a/cyberdrop_dl/config_definitions/global_settings.py b/cyberdrop_dl/config_definitions/global_settings.py new file mode 100644 index 000000000..4081e89dd --- /dev/null +++ b/cyberdrop_dl/config_definitions/global_settings.py @@ -0,0 +1,58 @@ +from pydantic import BaseModel, ByteSize, Field, NonNegativeFloat, PositiveInt, field_serializer +from yarl import URL + +from .custom_types import AliasModel, HttpURL, NonEmptyStr + + +class General(BaseModel): + allow_insecure_connections: bool = False + user_agent: NonEmptyStr = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0" + proxy: HttpURL | None = None + flaresolverr: HttpURL | None = None + max_file_name_length: PositiveInt = 95 + max_folder_name_length: PositiveInt = 60 + required_free_space: ByteSize = ByteSize._validate("5GB", "") + + @field_serializer("required_free_space") + def human_readable(self, value: ByteSize | int) -> str: + if not isinstance(value, ByteSize): + value = ByteSize(value) + return value.human_readable(decimal=True) + + @field_serializer("flaresolverr", "proxy") + def convert_to_str(self, value: URL) -> str: + if isinstance(value, URL): + return str(value) + return value + + +class RateLimitingOptions(BaseModel): + connection_timeout: PositiveInt = 15 + download_attempts: PositiveInt = 5 + read_timeout: PositiveInt = 300 + rate_limit: PositiveInt = 50 + download_delay: NonNegativeFloat = 0.5 + max_simultaneous_downloads: PositiveInt = 15 + max_simultaneous_downloads_per_domain: PositiveInt = 3 + download_speed_limit: ByteSize = ByteSize(0) + + @field_serializer("download_speed_limit") + def human_readable(self, value: ByteSize | int) -> str: + if not isinstance(value, ByteSize): + value = ByteSize(value) + return value.human_readable(decimal=True) + + +class UIOptions(BaseModel): + vi_mode: bool = False + refresh_rate: PositiveInt = 10 + scraping_item_limit: PositiveInt = 5 + downloading_item_limit: PositiveInt = 5 + + +class GlobalSettings(AliasModel): + general: General = Field(validation_alias="General", default=General()) + rate_limiting_options: RateLimitingOptions = Field( + validation_alias="Rate_Limiting_Options", default=RateLimitingOptions() + ) + ui_options: UIOptions = Field(validation_alias="UI_Options", default=UIOptions()) diff --git a/cyberdrop_dl/downloader/downloader.py b/cyberdrop_dl/downloader/downloader.py index 3ed10e496..291b76b65 100644 --- a/cyberdrop_dl/downloader/downloader.py +++ b/cyberdrop_dl/downloader/downloader.py @@ -38,10 +38,8 @@ async def wrapper(self: Downloader, *args, **kwargs) -> None: except DownloadError as e: self.attempt_task_removal(media_item) - max_attempts = self.manager.config_manager.global_settings_data["Rate_Limiting_Options"][ - "download_attempts" - ] - if self.manager.config_manager.settings_data["Download_Options"]["disable_download_attempt_limit"]: + max_attempts = self.manager.config_manager.global_settings_data.rate_limiting_options.download_attempts + if self.manager.config_manager.settings_data.download_options.disable_download_attempt_limit: max_attempts = 1 if e.status != 999: @@ -118,9 +116,9 @@ def startup(self) -> None: self.client = self.manager.client_manager.downloader_session self._semaphore = asyncio.Semaphore(self.manager.download_manager.get_download_limit(self.domain)) - self.manager.path_manager.download_dir.mkdir(parents=True, exist_ok=True) - if self.manager.config_manager.settings_data["Sorting"]["sort_downloads"]: - self.manager.path_manager.sorted_dir.mkdir(parents=True, exist_ok=True) + self.manager.path_manager.download_folder.mkdir(parents=True, exist_ok=True) + if self.manager.config_manager.settings_data.sorting.sort_downloads: + self.manager.path_manager.sorted_folder.mkdir(parents=True, exist_ok=True) async def run(self, media_item: MediaItem) -> None: """Runs the download loop.""" @@ -159,9 +157,9 @@ def check_file_can_download(self, media_item: MediaItem) -> None: def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> None: """Sets the file's datetime.""" - if self.manager.config_manager.settings_data["Download_Options"]["disable_file_timestamps"]: + if self.manager.config_manager.settings_data.download_options.disable_file_timestamps: return - if not isinstance(media_item.datetime, Field): + if not media_item.datetime: file = File(str(complete_file)) file.set( created=media_item.datetime, @@ -171,10 +169,10 @@ def set_file_datetime(self, media_item: MediaItem, complete_file: Path) -> None: def attempt_task_removal(self, media_item: MediaItem) -> None: """Attempts to remove the task from the progress bar.""" - if not isinstance(media_item.task_id, Field): + if media_item.task_id is not None: with contextlib.suppress(ValueError): self.manager.progress_manager.file_progress.remove_file(media_item.task_id) - media_item.task_id = field(init=False) + media_item.task_id = None """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @@ -219,7 +217,7 @@ async def download(self, media_item: MediaItem) -> None: aiohttp.ServerTimeoutError, ) as e: ui_message = getattr(e, "status", type(e).__name__) - if isinstance(media_item.partial_file, Path) and media_item.partial_file.is_file(): + if media_item.partial_file and media_item.partial_file.is_file(): size = media_item.partial_file.stat().st_size if ( media_item.filename in self._current_attempt_filesize diff --git a/cyberdrop_dl/main.py b/cyberdrop_dl/main.py index 17d9b0c4b..1dc56134e 100644 --- a/cyberdrop_dl/main.py +++ b/cyberdrop_dl/main.py @@ -11,6 +11,7 @@ from time import perf_counter from typing import TYPE_CHECKING +from pydantic import ValidationError from rich.console import Console from rich.logging import RichHandler @@ -32,6 +33,7 @@ send_webhook_message, sent_apprise_notifications, ) +from cyberdrop_dl.utils.yaml import handle_validation_error if TYPE_CHECKING: from collections.abc import Callable @@ -47,11 +49,20 @@ def startup() -> Manager: manager = Manager() manager.startup() - if not manager.args_manager.immediate_download: + if not manager.parsed_args.cli_only_args.download: ProgramUI(manager) except InvalidYamlError as e: - print_to_console(e.message_rich) + print_to_console(e.message, error=True) + sys.exit(1) + + except ValidationError as e: + sources = { + "GlobalSettings": manager.config_manager.global_settings, + "ConfigSettings": manager.config_manager.settings, + "AuthSettings": manager.config_manager.authentication_settings, + } + handle_validation_error(e, sources=sources) sys.exit(1) except KeyboardInterrupt: @@ -64,7 +75,7 @@ def startup() -> Manager: async def runtime(manager: Manager) -> None: """Main runtime loop for the program, this will run until all scraping and downloading is complete.""" - if manager.args_manager.sort_all_configs: + if manager.parsed_args.deprecated_args.sort_all_configs: return with manager.live_manager.get_main_live(stop=True): @@ -76,7 +87,7 @@ async def runtime(manager: Manager) -> None: def pre_runtime(manager: Manager) -> None: """Actions to complete before main runtime.""" - if manager.config_manager.settings_data["Browser_Cookies"]["auto_import"]: + if manager.config_manager.settings_data.browser_cookies.auto_import: get_cookies_from_browsers(manager) @@ -89,17 +100,15 @@ async def post_runtime(manager: Manager) -> None: 20, ) # checking and removing dupes - if not manager.args_manager.sort_all_configs: + if not manager.parsed_args.deprecated_args.sort_all_configs: await manager.hash_manager.hash_client.cleanup_dupes_after_download() - if (isinstance(manager.args_manager.sort_downloads, bool) and manager.args_manager.sort_downloads) or ( - manager.config_manager.settings_data["Sorting"]["sort_downloads"] and not manager.args_manager.retry_any - ): + if manager.config_manager.settings_data.sorting.sort_downloads and not manager.parsed_args.cli_only_args.retry_any: sorter = Sorter(manager) await sorter.sort() await check_partials_and_empty_folders(manager) - if manager.config_manager.settings_data["Runtime_Options"]["update_last_forum_post"]: + if manager.config_manager.settings_data.runtime_options.update_last_forum_post: await manager.log_manager.update_last_forum_post() @@ -109,15 +118,15 @@ def setup_debug_logger(manager: Manager) -> Path | None: running_in_IDE = os.getenv("PYCHARM_HOSTED") or os.getenv("TERM_PROGRAM") == "vscode" from cyberdrop_dl.utils import constants - if running_in_IDE or manager.config_manager.settings_data["Runtime_Options"]["log_level"] == -1: - manager.config_manager.settings_data["Runtime_Options"]["log_level"] = 10 + if running_in_IDE or manager.config_manager.settings_data.runtime_options.log_level == -1: + manager.config_manager.settings_data.runtime_options.log_level = 10 constants.DEBUG_VAR = True - if running_in_IDE or manager.config_manager.settings_data["Runtime_Options"]["console_log_level"] == -1: + if running_in_IDE or manager.config_manager.settings_data.runtime_options.console_log_level == -1: constants.CONSOLE_DEBUG_VAR = True if constants.DEBUG_VAR: - logger_debug.setLevel(manager.config_manager.settings_data["Runtime_Options"]["log_level"]) + logger_debug.setLevel(manager.config_manager.settings_data.runtime_options.log_level) debug_log_file_path = Path(__file__).parent / "cyberdrop_dl_debug.log" if running_in_IDE: debug_log_file_path = Path(__file__).parents[1] / "cyberdrop_dl_debug.log" @@ -125,12 +134,12 @@ def setup_debug_logger(manager: Manager) -> Path | None: rich_file_handler_debug = RichHandler( **constants.RICH_HANDLER_DEBUG_CONFIG, console=Console(file=debug_log_file_path.open("w", encoding="utf8"), width=constants.DEFAULT_CONSOLE_WIDTH), - level=manager.config_manager.settings_data["Runtime_Options"]["log_level"], + level=manager.config_manager.settings_data.runtime_options.log_level, ) logger_debug.addHandler(rich_file_handler_debug) # aiosqlite_log = logging.getLogger("aiosqlite") - # aiosqlite_log.setLevel(manager.config_manager.settings_data['Runtime_Options']['log_level']) + # aiosqlite_log.setLevel(manager.config_manager.settings_data.runtime_options.log_level) # aiosqlite_log.addHandler(file_handler_debug) return debug_log_file_path.resolve() if debug_log_file_path else None @@ -140,7 +149,7 @@ def setup_logger(manager: Manager, config_name: str) -> None: from cyberdrop_dl.utils import constants logger = logging.getLogger("cyberdrop_dl") - if manager.args_manager.all_configs: + if manager.multiconfig: if len(logger.handlers) > 0: log("Picking new config...", 20) manager.config_manager.change_config(config_name) @@ -150,10 +159,10 @@ def setup_logger(manager: Manager, config_name: str) -> None: logger.removeHandler(logger.handlers[0]) old_file_handler.close() - logger.setLevel(manager.config_manager.settings_data["Runtime_Options"]["log_level"]) + logger.setLevel(manager.config_manager.settings_data.runtime_options.log_level) if constants.DEBUG_VAR: - manager.config_manager.settings_data["Runtime_Options"]["log_level"] = 10 + manager.config_manager.settings_data.runtime_options.log_level = 10 rich_file_handler = RichHandler( **constants.RICH_HANDLER_CONFIG, @@ -161,11 +170,11 @@ def setup_logger(manager: Manager, config_name: str) -> None: file=manager.path_manager.main_log.open("w", encoding="utf8"), width=constants.DEFAULT_CONSOLE_WIDTH, ), - level=manager.config_manager.settings_data["Runtime_Options"]["log_level"], + level=manager.config_manager.settings_data.runtime_options.log_level, ) logger.addHandler(rich_file_handler) - constants.CONSOLE_LEVEL = manager.config_manager.settings_data["Runtime_Options"]["console_log_level"] + constants.CONSOLE_LEVEL = manager.config_manager.settings_data.runtime_options.console_log_level def ui_error_handling_wrapper(func: Callable) -> None: @@ -194,7 +203,7 @@ async def director(manager: Manager) -> None: debug_log_file_path = setup_debug_logger(manager) configs_to_run = [manager.config_manager.loaded_config] - if manager.args_manager.all_configs: + if manager.multiconfig: configs_to_run = manager.config_manager.get_configs() configs_to_run.sort() diff --git a/cyberdrop_dl/managers/args_manager.py b/cyberdrop_dl/managers/args_manager.py deleted file mode 100644 index 61138c6af..000000000 --- a/cyberdrop_dl/managers/args_manager.py +++ /dev/null @@ -1,164 +0,0 @@ -from __future__ import annotations - -from dataclasses import field -from pathlib import Path - -import arrow - -from cyberdrop_dl.utils.args.args import parse_args - - -class ArgsManager: - def __init__(self) -> None: - self.parsed_args = {} - - self.proxy = "" - self.flaresolverr = "" - - self.all_configs = False - self.sort_all_configs = False - self.retry_failed = False - self.retry_all = False - self.retry_any = False - self.retry_maintenance = False - self.webhook_url = "" - self.max_items = None - - self.immediate_download = False - self.no_ui = False - self.load_config_from_args = False - self.load_config_name = "" - self.other_links: list = [] - self.additive_args = ["skip_hosts", "only_hosts"] - - # Files - self.input_file = None - self.download_dir = None - self.config_file = None - self.appdata_dir = None - self.log_dir = None - - # Sorting - self.sort_downloads = field(init=False) - self.sort_cdl_only = field(init=True) - self.sort_folder = None - self.scan_folder = None - - # Logs - self.main_log_filename = None - self.last_forum_post_filename = None - self.unsupported_urls_filename = None - self.download_error_urls_filename = None - self.scrape_error_urls_filename = None - - # UI - self.vi_mode = None - self.after = None - self.before = None - - self._convert_to_paths = [ - "input_file", - "download_dir", - "config_file", - "appdata_dir", - "log_dir", - "sort_folder", - "scan_folder", - ] - - def startup(self) -> None: - """Parses arguments and sets variables accordingly.""" - if self.parsed_args: - return - - self.parsed_args = parse_args().__dict__ - - self.immediate_download = self.parsed_args["download"] - self.load_config_name = self.parsed_args["config"] - self.vi_mode = self.parsed_args["vi_mode"] - - for arg in self._convert_to_paths: - value = self.parsed_args.get(arg) - if value: - setattr(self, arg, Path(value)) - - if self.parsed_args["no_ui"]: - self.immediate_download = True - self.no_ui = True - - if self.load_config_name: - self.load_config_from_args = True - - if self.parsed_args["download_all_configs"]: - self.all_configs = True - self.immediate_download = True - - if self.parsed_args["sort_all_configs"]: - self.sort_all_configs = True - self.all_configs = True - self.immediate_download = True - if self.parsed_args["retry_failed"]: - self.retry_failed = True - self.retry_any = True - self.immediate_download = True - if self.parsed_args["retry_all"]: - self.retry_all = True - self.retry_any = True - self.immediate_download = True - if self.parsed_args["retry_maintenance"]: - self.retry_maintenance = True - self.immediate_download = True - - if self.parsed_args["config_file"]: - self.immediate_download = True - if self.parsed_args["sort_downloads"]: - self.sort_downloads = True - if not self.parsed_args["sort_all_downloads"]: - self.sort_cdl_only = True - if self.parsed_args["main_log_filename"]: - self.main_log_filename = self.parsed_args["main_log_filename"] - if self.parsed_args["last_forum_post_filename"]: - self.last_forum_post_filename = self.parsed_args["last_forum_post_filename"] - if self.parsed_args["unsupported_urls_filename"]: - self.unsupported_urls_filename = self.parsed_args["unsupported_urls_filename"] - if self.parsed_args["download_error_urls_filename"]: - self.download_error_urls_filename = self.parsed_args["download_error_urls_filename"] - if self.parsed_args["scrape_error_urls_filename"]: - self.scrape_error_urls_filename = self.parsed_args["scrape_error_urls_filename"] - - if self.parsed_args["proxy"]: - self.proxy = self.parsed_args["proxy"] - if self.parsed_args["flaresolverr"]: - self.flaresolverr = self.parsed_args["flaresolverr"] - - self.other_links = self.parsed_args["links"] - - self.after = self.parsed_args["completed_after"] or arrow.get(0) - self.before = self.parsed_args["completed_before"] or arrow.get("3000") - self.max_items = self.parsed_args["max_items_retry"] - self.webhook_url = self.parsed_args["webhook_url"] - - self.after = self.parsed_args["completed_after"] or arrow.get(0) - self.before = self.parsed_args["completed_before"] or arrow.get("3000") - self.max_items = self.parsed_args["max_items_retry"] - - del self.parsed_args["download"] - del self.parsed_args["download_all_configs"] - del self.parsed_args["config"] - del self.parsed_args["no_ui"] - del self.parsed_args["retry_failed"] - del self.parsed_args["retry_all"] - del self.parsed_args["retry_maintenance"] - del self.parsed_args["input_file"] - del self.parsed_args["download_dir"] - del self.parsed_args["appdata_dir"] - del self.parsed_args["config_file"] - del self.parsed_args["log_dir"] - del self.parsed_args["proxy"] - del self.parsed_args["links"] - del self.parsed_args["sort_downloads"] - del self.parsed_args["sort_all_downloads"] - del self.parsed_args["sort_folder"] - del self.parsed_args["scan_folder"] - del self.parsed_args["completed_after"] - del self.parsed_args["completed_before"] diff --git a/cyberdrop_dl/managers/cache_manager.py b/cyberdrop_dl/managers/cache_manager.py index eca60bdf8..0069969ce 100644 --- a/cyberdrop_dl/managers/cache_manager.py +++ b/cyberdrop_dl/managers/cache_manager.py @@ -3,7 +3,7 @@ from dataclasses import field from typing import TYPE_CHECKING, Any -import yaml +from cyberdrop_dl.utils import yaml if TYPE_CHECKING: from pathlib import Path @@ -11,19 +11,6 @@ from cyberdrop_dl.managers.manager import Manager -def _save_yaml(file: Path, data: dict) -> None: - """Saves a dict to a yaml file.""" - file.parent.mkdir(parents=True, exist_ok=True) - with file.open("w") as yaml_file: - yaml.dump(data, yaml_file) - - -def _load_yaml(file: Path) -> dict: - """Loads a yaml file and returns it as a dict.""" - with file.open() as yaml_file: - return yaml.load(yaml_file.read(), Loader=yaml.FullLoader) - - class CacheManager: def __init__(self, manager: Manager) -> None: self.manager = manager @@ -38,12 +25,12 @@ def startup(self, cache_file: Path) -> None: self.save("default_config", "Default") self.load() - if self.manager.args_manager.appdata_dir: + if self.manager.parsed_args.cli_only_args.appdata_folder: self.save("first_startup_completed", True) def load(self) -> None: """Loads the cache file into memory.""" - self._cache = _load_yaml(self.cache_file) + self._cache = yaml.load(self.cache_file) def get(self, key: str) -> Any: """Returns the value of a key in the cache.""" @@ -52,10 +39,10 @@ def get(self, key: str) -> Any: def save(self, key: str, value: Any) -> None: """Saves a key and value to the cache.""" self._cache[key] = value - _save_yaml(self.cache_file, self._cache) + yaml.save(self.cache_file, self._cache) def remove(self, key: str) -> None: """Removes a key from the cache.""" if key in self._cache: del self._cache[key] - _save_yaml(self.cache_file, self._cache) + yaml.save(self.cache_file, self._cache) diff --git a/cyberdrop_dl/managers/client_manager.py b/cyberdrop_dl/managers/client_manager.py index 88b3c5e6c..989393d56 100644 --- a/cyberdrop_dl/managers/client_manager.py +++ b/cyberdrop_dl/managers/client_manager.py @@ -50,26 +50,19 @@ class ClientManager: def __init__(self, manager: Manager) -> None: self.manager = manager - self.connection_timeout = manager.config_manager.global_settings_data["Rate_Limiting_Options"][ - "connection_timeout" - ] - self.read_timeout = manager.config_manager.global_settings_data["Rate_Limiting_Options"]["read_timeout"] - self.rate_limit = manager.config_manager.global_settings_data["Rate_Limiting_Options"]["rate_limit"] - - self.download_delay = manager.config_manager.global_settings_data["Rate_Limiting_Options"]["download_delay"] - self.user_agent = manager.config_manager.global_settings_data["General"]["user_agent"] - self.verify_ssl = not manager.config_manager.global_settings_data["General"]["allow_insecure_connections"] - self.simultaneous_per_domain = manager.config_manager.global_settings_data["Rate_Limiting_Options"][ - "max_simultaneous_downloads_per_domain" - ] + global_settings_data = manager.config_manager.global_settings_data + self.connection_timeout = global_settings_data.rate_limiting_options.connection_timeout + self.read_timeout = global_settings_data.rate_limiting_options.read_timeout + self.rate_limit = global_settings_data.rate_limiting_options.rate_limit + + self.download_delay = global_settings_data.rate_limiting_options.download_delay + self.user_agent = global_settings_data.general.user_agent + self.verify_ssl = not global_settings_data.general.allow_insecure_connections + self.simultaneous_per_domain = global_settings_data.rate_limiting_options.max_simultaneous_downloads_per_domain self.ssl_context = ssl.create_default_context(cafile=certifi.where()) if self.verify_ssl else False self.cookies = aiohttp.CookieJar(quote_cookie=False) - self.proxy = ( - manager.config_manager.global_settings_data["General"]["proxy"] - if not manager.args_manager.proxy - else manager.args_manager.proxy - ) + self.proxy = global_settings_data.general.proxy self.domain_rate_limits = { "bunkrr": AsyncLimiter(5, 1), @@ -93,7 +86,7 @@ def __init__(self, manager: Manager) -> None: self.global_rate_limiter = AsyncLimiter(self.rate_limit, 1) self.session_limit = asyncio.Semaphore(50) self.download_session_limit = asyncio.Semaphore( - self.manager.config_manager.global_settings_data["Rate_Limiting_Options"]["max_simultaneous_downloads"], + self.manager.config_manager.global_settings_data.rate_limiting_options.max_simultaneous_downloads, ) self.scraper_session = ScraperClient(self) @@ -131,7 +124,7 @@ async def check_http_status( if download and headers.get("ETag") in DOWNLOAD_ERROR_ETAGS: message = DOWNLOAD_ERROR_ETAGS.get(headers.get("ETag")) raise DownloadError(HTTPStatus.NOT_FOUND, message=message, origin=origin) - + response_text = None with contextlib.suppress(UnicodeDecodeError): response_text = await response.text() @@ -152,14 +145,11 @@ async def check_http_status( if "data" in JSON_Resp and "error" in JSON_Resp["data"]: raise ScrapeError(JSON_Resp["status"], JSON_Resp["data"]["error"], origin=origin) - - status = status if headers.get("Content-Type") else CustomHTTPStatus.IM_A_TEAPOT message = "No content-type in response header" if headers.get("Content-Type") else None raise DownloadError(status=status, message=message, origin=origin) - @staticmethod def check_bunkr_maint(headers: dict): if headers.get("Content-Length") == "322509" and headers.get("Content-Type") == "video/mp4": @@ -207,15 +197,9 @@ class Flaresolverr: def __init__(self, client_manager: ClientManager) -> None: self.client_manager = client_manager - self.flaresolverr_host = ( - client_manager.manager.args_manager.flaresolverr - or client_manager.manager.config_manager.global_settings_data["General"]["flaresolverr"] - ) + self.flaresolverr_host = client_manager.manager.config_manager.global_settings_data.general.flaresolverr self.enabled = bool(self.flaresolverr_host) - if "http" not in self.flaresolverr_host: - self.flaresolverr_host = f"http://{self.flaresolverr_host}" self.session_id = None - self.flaresolverr_host = URL(self.flaresolverr_host) async def _request( self, diff --git a/cyberdrop_dl/managers/config_manager.py b/cyberdrop_dl/managers/config_manager.py index f6f69008e..245e92491 100644 --- a/cyberdrop_dl/managers/config_manager.py +++ b/cyberdrop_dl/managers/config_manager.py @@ -1,66 +1,18 @@ from __future__ import annotations -import copy import shutil from dataclasses import field -from pathlib import Path from time import sleep from typing import TYPE_CHECKING -import yaml - -from cyberdrop_dl.clients.errors import InvalidYamlError +from cyberdrop_dl.config_definitions import AuthSettings, ConfigSettings, GlobalSettings from cyberdrop_dl.managers.log_manager import LogManager -from cyberdrop_dl.utils.args.config_definitions import authentication_settings, global_settings, settings +from cyberdrop_dl.utils import yaml if TYPE_CHECKING: - from cyberdrop_dl.managers.manager import Manager -from cyberdrop_dl.utils.data_enums_classes.hash import Hashing - - -def _match_config_dicts(default: dict, existing: dict) -> dict: - """Matches the keys of two dicts and returns the new dict with the values of the existing dict.""" - for group in default: - for key in default[group]: - if group in existing and key in existing[group]: - default[group][key] = existing[group][key] - return copy.deepcopy(default) - - -# Custom representer function for YAML -def _enum_representer(dumper, data): - return dumper.represent_int(data.value) - + from pathlib import Path -def _save_yaml(file: Path, data: dict) -> None: - """Saves a dict to a yaml file.""" - file.parent.mkdir(parents=True, exist_ok=True) - # Register the custom representer - yaml.add_representer(Hashing, _enum_representer) - # dump - with file.open("w") as yaml_file: - yaml.dump(data, yaml_file) - pass - - -def _load_yaml(file: Path) -> dict: - """Loads a yaml file and returns it as a dict.""" - try: - with file.open() as yaml_file: - yaml_values = yaml.load(yaml_file.read(), Loader=yaml.FullLoader) - return yaml_values if yaml_values else {} - except yaml.constructor.ConstructorError as e: - raise InvalidYamlError(file, e) from None - - -def get_keys(dl: dict | list, keys: list | None = None) -> set: - keys = keys or [] - if isinstance(dl, dict): - keys += dl.keys() - _ = [get_keys(x, keys) for x in dl.values()] - elif isinstance(dl, list): - _ = [get_keys(x, keys) for x in dl] - return set(keys) + from cyberdrop_dl.managers.manager import Manager class ConfigManager: @@ -72,9 +24,9 @@ def __init__(self, manager: Manager) -> None: self.settings: Path = field(init=False) self.global_settings: Path = field(init=False) - self.authentication_data: dict = field(init=False) - self.settings_data: dict = field(init=False) - self.global_settings_data: dict = field(init=False) + self.authentication_data: AuthSettings = field(init=False) + self.settings_data: ConfigSettings = field(init=False) + self.global_settings_data: GlobalSettings = field(init=False) def startup(self) -> None: """Startup process for the config manager.""" @@ -82,219 +34,100 @@ def startup(self) -> None: self.loaded_config = self.manager.cache_manager.get("default_config") if not self.loaded_config: self.loaded_config = "Default" - if self.manager.args_manager.load_config_from_args: - self.loaded_config = self.manager.args_manager.load_config_name + if self.manager.parsed_args.cli_only_args.config: + self.loaded_config = self.manager.parsed_args.cli_only_args.config + + self.settings = self.manager.path_manager.config_folder / self.loaded_config / "settings.yaml" + self.global_settings = self.manager.path_manager.config_folder / "global_settings.yaml" + self.authentication_settings = self.manager.path_manager.config_folder / "authentication.yaml" + auth_override = self.manager.path_manager.config_folder / self.loaded_config / "authentication.yaml" - self.authentication_settings = self.manager.path_manager.config_dir / "authentication.yaml" - self.global_settings = self.manager.path_manager.config_dir / "global_settings.yaml" - self.settings = self.manager.path_manager.config_dir / self.loaded_config / "settings.yaml" - if (self.manager.path_manager.config_dir / self.loaded_config / "authentication.yaml").is_file(): - self.authentication_settings = ( - self.manager.path_manager.config_dir / self.loaded_config / "authentication.yaml" - ) + if auth_override.is_file(): + self.authentication_settings = auth_override self.settings.parent.mkdir(parents=True, exist_ok=True) + self.pydantic_config = self.manager.cache_manager.get("pydantic_config") self.load_configs() + if not self.pydantic_config: + self.pydantic_config = True + self.manager.cache_manager.save("pydantic_config", True) def load_configs(self) -> None: """Loads all the configs.""" + self._load_authentication_config() + self._load_global_settings_config() + self._load_settings_config() + + def _load_authentication_config(self) -> None: + """Verifies the authentication config file and creates it if it doesn't exist.""" + posible_fields = AuthSettings.model_fields.keys() if self.authentication_settings.is_file(): - self._verify_authentication_config() - else: - self.authentication_data = copy.deepcopy(authentication_settings) - _save_yaml(self.authentication_settings, self.authentication_data) + self.authentication_data = AuthSettings.model_validate(yaml.load(self.authentication_settings)) + if posible_fields == self.authentication_data.model_fields_set and self.pydantic_config: + return - if self.global_settings.is_file(): - self._verify_global_settings_config() else: - self.global_settings_data = copy.deepcopy(global_settings) - _save_yaml(self.global_settings, self.global_settings_data) + self.authentication_data = AuthSettings() - if self.manager.args_manager.config_file: - self.settings = Path(self.manager.args_manager.config_file) + yaml.save(self.authentication_settings, self.authentication_data) + + def _load_settings_config(self) -> None: + """Verifies the settings config file and creates it if it doesn't exist.""" + posible_fields = ConfigSettings.model_fields.keys() + if self.manager.parsed_args.cli_only_args.config_file: + self.settings = self.manager.parsed_args.cli_only_args.config_file self.loaded_config = "CLI-Arg Specified" if self.settings.is_file(): - self._verify_settings_config() + self.settings_data = ConfigSettings.model_validate(yaml.load(self.settings)) + if posible_fields == self.settings_data.model_fields_set and self.pydantic_config: + return else: from cyberdrop_dl.utils import constants - self.settings_data = copy.deepcopy(settings) - self.settings_data["Files"]["input_file"] = ( - constants.APP_STORAGE / "Configs" / self.loaded_config / "URLs.txt" - ) - self.settings_data["Files"]["download_folder"] = constants.DOWNLOAD_STORAGE / "Cyberdrop-DL Downloads" - self.settings_data["Logs"]["log_folder"] = constants.APP_STORAGE / "Configs" / self.loaded_config / "Logs" - self.settings_data["Logs"]["webhook_url"] = "" - self.settings_data["Sorting"]["sort_folder"] = constants.DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" - self.settings_data["Sorting"]["scan_folder"] = None - self.write_updated_settings_config() - - def return_verified(self, value) -> any: - if isinstance(value, bool): - return bool(value) - if isinstance(value, int): - return int(value) - if isinstance(value, str): - return str(value) - if isinstance(value, list): - return list(value) - if isinstance(value, dict): - return dict(value) - return value - - def _verify_authentication_config(self) -> None: - """Verifies the authentication config file and creates it if it doesn't exist.""" - default_auth_data = copy.deepcopy(authentication_settings) - existing_auth_data = _load_yaml(self.authentication_settings) + self.settings_data = ConfigSettings() + self.settings_data.files.input_file = constants.APP_STORAGE / "Configs" / self.loaded_config / "URLs.txt" + self.settings_data.files.download_folder = constants.DOWNLOAD_STORAGE / "Cyberdrop-DL Downloads" + self.settings_data.logs.log_folder = constants.APP_STORAGE / "Configs" / self.loaded_config / "Logs" + self.settings_data.sorting.sort_folder = constants.DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" - if get_keys(default_auth_data) == get_keys(existing_auth_data): - self.authentication_data = existing_auth_data - return + yaml.save(self.settings, self.settings_data) - self.authentication_data = _match_config_dicts(default_auth_data, existing_auth_data) - _save_yaml(self.authentication_settings, self.authentication_data) - - def _verify_settings_config(self) -> None: - """Verifies the settings config file and creates it if it doesn't exist.""" - default_settings_data = copy.deepcopy(settings) - existing_settings_data = _load_yaml(self.settings) - self.settings_data = _match_config_dicts(default_settings_data, existing_settings_data) - paths = set( - [ - ("Files", "input_file"), - ("Files", "download_folder"), - ("Logs", "log_folder"), - ("Sorting", "sort_folder"), - ("Sorting", "scan_folder"), - ] - ) - enums = {("Dupe_Cleanup_Options", "hashing"): Hashing} - for key, value in default_settings_data.items(): - for subkey, subvalue in value.items(): - self.settings_data[key][subkey] = self.return_verified(subvalue) - if (key, subkey) in paths: - path = self.settings_data[key][subkey] - if (path == "None" or path is None) and subkey == "scan_folder": - self.settings_data[key][subkey] = None - else: - self.settings_data[key][subkey] = Path(path) - - if (key, subkey) in enums: - enum_value = self.settings_data[key][subkey] - enum_class = enums[(key, subkey)] - if isinstance(enum_value, str): - self.settings_data[key][subkey] = enum_class[enum_value] - else: - self.settings_data[key][subkey] = enum_class(enum_value) - - if get_keys(default_settings_data) == get_keys(existing_settings_data): - return - - save_data = copy.deepcopy(self.settings_data) - save_data["Files"]["input_file"] = str(save_data["Files"]["input_file"]) - save_data["Files"]["download_folder"] = str(save_data["Files"]["download_folder"]) - save_data["Logs"]["log_folder"] = str(save_data["Logs"]["log_folder"]) - save_data["Logs"]["webhook_url"] = str(save_data["Logs"]["webhook_url"]) - save_data["Sorting"]["sort_folder"] = str(save_data["Sorting"]["sort_folder"]) - save_data["Sorting"]["scan_folder"] = ( - str(save_data["Sorting"]["scan_folder"]) - if save_data["Sorting"]["scan_folder"] not in ["None", None] - else None - ) - _save_yaml(self.settings, save_data) - - def _verify_global_settings_config(self) -> None: + def _load_global_settings_config(self) -> None: """Verifies the global settings config file and creates it if it doesn't exist.""" - default_global_settings_data = copy.deepcopy(global_settings) - existing_global_settings_data = _load_yaml(self.global_settings) - self.global_settings_data = _match_config_dicts(default_global_settings_data, existing_global_settings_data) - - if get_keys(default_global_settings_data) == get_keys(existing_global_settings_data): - self.global_settings_data = existing_global_settings_data - return - - for key, value in default_global_settings_data.items(): - for subkey, subvalue in value.items(): - self.global_settings_data[key][subkey] = self.return_verified(subvalue) + posible_fields = ConfigSettings.model_fields.keys() + if self.global_settings.is_file(): + self.global_settings_data = GlobalSettings.model_validate(yaml.load(self.global_settings)) + if posible_fields == self.global_settings_data.model_fields_set and self.pydantic_config: + return + else: + self.global_settings_data = GlobalSettings() - _save_yaml(self.global_settings, self.global_settings_data) + yaml.save(self.global_settings, self.global_settings_data) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" - @staticmethod - def create_new_config(new_settings: Path, settings_data: dict) -> None: + def save_as_new_config(self, new_settings: Path, settings_data: ConfigSettings) -> None: """Creates a new settings config file.""" - settings_data["Files"]["input_file"] = ( - str(settings_data["Files"]["input_file"]) if settings_data["Files"]["input_file"] is not None else None - ) - settings_data["Files"]["download_folder"] = ( - str(settings_data["Files"]["download_folder"]) - if settings_data["Files"]["download_folder"] is not None - else None - ) - settings_data["Logs"]["log_folder"] = ( - str(settings_data["Logs"]["log_folder"]) if settings_data["Logs"]["log_folder"] is not None else None - ) - settings_data["Logs"]["webhook_url"] = ( - str(settings_data["Logs"]["webhook_url"]) if settings_data["Logs"]["webhook_url"] is not None else None - ) - settings_data["Sorting"]["sort_folder"] = ( - str(settings_data["Sorting"]["sort_folder"]) - if settings_data["Sorting"]["sort_folder"] is not None - else None - ) - settings_data["Sorting"]["scan_folder"] = ( - str(settings_data["Sorting"]["scan_folder"]) - if settings_data["Sorting"]["scan_folder"] not in ["None", None] - else None - ) - - _save_yaml(new_settings, settings_data) + yaml.save(new_settings, settings_data) def write_updated_authentication_config(self) -> None: """Write updated authentication data.""" - _save_yaml(self.authentication_settings, self.authentication_data) + yaml.save(self.authentication_settings, self.authentication_data) def write_updated_settings_config(self) -> None: """Write updated settings data.""" - settings_data = copy.deepcopy(self.settings_data) - settings_data["Files"]["input_file"] = ( - str(settings_data["Files"]["input_file"]) if settings_data["Files"]["input_file"] is not None else None - ) - settings_data["Files"]["download_folder"] = ( - str(settings_data["Files"]["download_folder"]) - if settings_data["Files"]["download_folder"] is not None - else None - ) - settings_data["Logs"]["log_folder"] = ( - str(settings_data["Logs"]["log_folder"]) if settings_data["Logs"]["log_folder"] is not None else None - ) - settings_data["Logs"]["webhook_url"] = ( - str(settings_data["Logs"]["webhook_url"]) if settings_data["Logs"]["webhook_url"] is not None else None - ) - settings_data["Sorting"]["sort_folder"] = ( - str(settings_data["Sorting"]["sort_folder"]) - if settings_data["Sorting"]["sort_folder"] is not None - else None - ) - settings_data["Sorting"]["scan_folder"] = ( - str(settings_data["Sorting"]["scan_folder"]) - if settings_data["Sorting"]["scan_folder"] not in ["None", None] - else None - ) - - _save_yaml(self.settings, settings_data) + yaml.save(self.settings, self.settings_data) def write_updated_global_settings_config(self) -> None: """Write updated global settings data.""" - _save_yaml(self.global_settings, self.global_settings_data) + yaml.save(self.global_settings, self.global_settings_data) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" def get_configs(self) -> list: """Returns a list of all the configs.""" - return [config.name for config in self.manager.path_manager.config_dir.iterdir() if config.is_dir()] + return [config.name for config in self.manager.path_manager.config_folder.iterdir() if config.is_dir()] def change_default_config(self, config_name: str) -> None: """Changes the default config.""" @@ -308,7 +141,7 @@ def delete_config(self, config_name: str) -> None: if self.manager.cache_manager.get("default_config") == config_name: self.manager.cache_manager.save("default_config", configs[0]) - config = self.manager.path_manager.config_dir / config_name + config = self.manager.path_manager.config_folder / config_name shutil.rmtree(config) def change_config(self, config_name: str) -> None: diff --git a/cyberdrop_dl/managers/db_manager.py b/cyberdrop_dl/managers/db_manager.py index 0be36a028..3ae961162 100644 --- a/cyberdrop_dl/managers/db_manager.py +++ b/cyberdrop_dl/managers/db_manager.py @@ -33,7 +33,7 @@ async def startup(self) -> None: """Startup process for the DBManager.""" self._db_conn = await aiosqlite.connect(self._db_path) - self.ignore_history = self.manager.config_manager.settings_data["Runtime_Options"]["ignore_history"] + self.ignore_history = self.manager.config_manager.settings_data.runtime_options.ignore_history self.history_table = HistoryTable(self._db_conn) self.hash_table = HashTable(self._db_conn) diff --git a/cyberdrop_dl/managers/download_manager.py b/cyberdrop_dl/managers/download_manager.py index f1b31b028..74a7603f2 100644 --- a/cyberdrop_dl/managers/download_manager.py +++ b/cyberdrop_dl/managers/download_manager.py @@ -60,18 +60,12 @@ def __init__(self, manager: Manager) -> None: def get_download_limit(self, key: str) -> int: """Returns the download limit for a domain.""" - if key in self.download_limits: - instances = self.download_limits[key] - else: - instances = self.manager.config_manager.global_settings_data["Rate_Limiting_Options"][ - "max_simultaneous_downloads_per_domain" - ] + rate_limiting_options = self.manager.config_manager.global_settings_data.rate_limiting_options + instances = self.download_limits.get(key, rate_limiting_options.max_simultaneous_downloads_per_domain) return min( instances, - self.manager.config_manager.global_settings_data["Rate_Limiting_Options"][ - "max_simultaneous_downloads_per_domain" - ], + rate_limiting_options.max_simultaneous_downloads_per_domain, ) @staticmethod @@ -83,7 +77,7 @@ def basic_auth(username: str, password: str) -> str: def check_free_space(self, folder: Path | None = None) -> bool: """Checks if there is enough free space on the drive to continue operating.""" if not folder: - folder = self.manager.path_manager.download_dir + folder = self.manager.path_manager.download_folder folder = folder.resolve() while not folder.is_dir() and folder.parents: @@ -94,19 +88,16 @@ def check_free_space(self, folder: Path | None = None) -> bool: return False free_space = disk_usage(folder).free free_space_gb = free_space / 1024**3 - return free_space_gb >= self.manager.config_manager.global_settings_data["General"]["required_free_space"] + return free_space_gb >= self.manager.config_manager.global_settings_data.general.required_free_space def check_allowed_filetype(self, media_item: MediaItem) -> bool: """Checks if the file type is allowed to download.""" - ignore_options = self.manager.config_manager.settings_data["Ignore_Options"] + ignore_options = self.manager.config_manager.settings_data.ignore_options valid_extensions = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] | FILE_FORMATS["Audio"] - if media_item.ext in FILE_FORMATS["Images"] and ignore_options["exclude_images"]: + if media_item.ext in FILE_FORMATS["Images"] and ignore_options.exclude_images: return False - if media_item.ext in FILE_FORMATS["Videos"] and ignore_options["exclude_videos"]: + if media_item.ext in FILE_FORMATS["Videos"] and ignore_options.exclude_videos: return False - if media_item.ext in FILE_FORMATS["Audio"] and ignore_options["exclude_audio"]: + if media_item.ext in FILE_FORMATS["Audio"] and ignore_options.exclude_audio: return False - return not ( - self.manager.config_manager.settings_data["Ignore_Options"]["exclude_other"] - and media_item.ext not in valid_extensions - ) + return not (ignore_options.exclude_other and media_item.ext not in valid_extensions) diff --git a/cyberdrop_dl/managers/leaky.py b/cyberdrop_dl/managers/leaky.py index 2e3f1fafb..daf85119c 100644 --- a/cyberdrop_dl/managers/leaky.py +++ b/cyberdrop_dl/managers/leaky.py @@ -13,11 +13,12 @@ class LeakyBucket(AsyncLimiter): def __init__(self, manager: Manager) -> None: - self.download_speed_limit = manager.config_manager.global_settings_data["Rate_Limiting_Options"][ - "download_speed_limit" - ] + self.download_speed_limit = ( + manager.config_manager.global_settings_data.rate_limiting_options.download_speed_limit + ) + self.max_amount = 1024 * 1024 * 10 - super().__init__(self.download_speed_limit * 1024, 1) + super().__init__(self.download_speed_limit, 1) async def acquire(self, amount: float = 1) -> None: if self.download_speed_limit <= 0: diff --git a/cyberdrop_dl/managers/live_manager.py b/cyberdrop_dl/managers/live_manager.py index 485f42835..292ff69b9 100644 --- a/cyberdrop_dl/managers/live_manager.py +++ b/cyberdrop_dl/managers/live_manager.py @@ -19,12 +19,13 @@ class LiveManager: def __init__(self, manager: Manager) -> None: self.manager = manager + self.no_ui = self.manager.parsed_args.cli_only_args.no_ui self.live = Live( auto_refresh=True, - refresh_per_second=self.manager.config_manager.global_settings_data["UI_Options"]["refresh_rate"], + refresh_per_second=self.manager.config_manager.global_settings_data.ui_options.refresh_rate, console=console, transient=True, - screen=not self.manager.args_manager.no_ui, + screen=not self.no_ui, ) self.placeholder = Progress( @@ -35,7 +36,7 @@ def __init__(self, manager: Manager) -> None: @contextmanager def get_live(self, layout: Layout, stop: bool = False) -> Generator[Live]: - show = self.placeholder if self.manager.args_manager.no_ui else layout + show = self.placeholder if self.no_ui else layout try: self.live.start() self.live.update(show, refresh=True) diff --git a/cyberdrop_dl/managers/log_manager.py b/cyberdrop_dl/managers/log_manager.py index 027b976ab..b640824d2 100644 --- a/cyberdrop_dl/managers/log_manager.py +++ b/cyberdrop_dl/managers/log_manager.py @@ -20,7 +20,7 @@ class LogManager: def __init__(self, manager: Manager) -> None: self.manager = manager self.main_log: Path = manager.path_manager.main_log - self.last_post_log: Path = manager.path_manager.last_post_log + self.last_post_log: Path = manager.path_manager.last_forum_post_log self.unsupported_urls_log: Path = manager.path_manager.unsupported_urls_log self.download_error_log: Path = manager.path_manager.download_error_log self.scrape_error_log: Path = manager.path_manager.scrape_error_log diff --git a/cyberdrop_dl/managers/manager.py b/cyberdrop_dl/managers/manager.py index 31ea525d7..43447432c 100644 --- a/cyberdrop_dl/managers/manager.py +++ b/cyberdrop_dl/managers/manager.py @@ -1,13 +1,12 @@ from __future__ import annotations -import copy import json from dataclasses import Field, field from time import perf_counter from typing import TYPE_CHECKING from cyberdrop_dl import __version__ -from cyberdrop_dl.managers.args_manager import ArgsManager +from cyberdrop_dl.config_definitions import ConfigSettings, GlobalSettings from cyberdrop_dl.managers.cache_manager import CacheManager from cyberdrop_dl.managers.client_manager import ClientManager from cyberdrop_dl.managers.config_manager import ConfigManager @@ -19,7 +18,7 @@ from cyberdrop_dl.managers.path_manager import PathManager from cyberdrop_dl.managers.progress_manager import ProgressManager from cyberdrop_dl.managers.realdebrid_manager import RealDebridManager -from cyberdrop_dl.utils.args import config_definitions +from cyberdrop_dl.utils.args import ParsedArgs from cyberdrop_dl.utils.data_enums_classes.supported_domains import SupportedDomains from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.transfer.first_time_setup import TransitionManager @@ -32,7 +31,7 @@ class Manager: def __init__(self) -> None: - self.args_manager: ArgsManager = ArgsManager() + self.parsed_args: ParsedArgs = field(init=False) self.cache_manager: CacheManager = CacheManager(self) self.path_manager: PathManager = field(init=False) self.config_manager: ConfigManager = field(init=False) @@ -59,55 +58,53 @@ def __init__(self) -> None: self.vi_mode: bool = False self.start_time: float = perf_counter() self.downloaded_data: int = 0 + self.multiconfig: bool = False def startup(self) -> None: """Startup process for the manager.""" - self.args_startup() + if isinstance(self.parsed_args, Field): + self.parsed_args = ParsedArgs.parse_args() - if not self.args_manager.appdata_dir: + if not self.parsed_args.cli_only_args.appdata_folder: self.first_time_setup.startup() self.path_manager = PathManager(self) self.path_manager.pre_startup() - self.cache_manager.startup(self.path_manager.cache_dir / "cache.yaml") + self.cache_manager.startup(self.path_manager.cache_folder / "cache.yaml") self.config_manager = ConfigManager(self) self.config_manager.startup() - self.vi_mode = ( - self.config_manager.global_settings_data["UI_Options"]["vi_mode"] - if self.args_manager.vi_mode is None - else self.args_manager.vi_mode - ) + self.args_consolidation() + self.vi_mode = self.config_manager.global_settings_data.ui_options.vi_mode self.path_manager.startup() self.log_manager = LogManager(self) + self.adjust_for_simpcity() + if self.config_manager.loaded_config.casefold() == "all" or self.parsed_args.cli_only_args.multiconfig: + self.multiconfig = True + def adjust_for_simpcity(self) -> None: # Adjust settings for SimpCity update simp_settings_adjusted = self.cache_manager.get("simp_settings_adjusted") if not simp_settings_adjusted: for config in self.config_manager.get_configs(): if config != self.config_manager.loaded_config: self.config_manager.change_config(config) - self.config_manager.settings_data["Runtime_Options"]["update_last_forum_post"] = True + self.config_manager.settings_data.runtime_options.update_last_forum_post = True self.config_manager.write_updated_settings_config() - global_settings = self.config_manager.global_settings_data - if global_settings["Rate_Limiting_Options"]["download_attempts"] >= 10: - global_settings["Rate_Limiting_Options"]["download_attempts"] = 5 - if global_settings["Rate_Limiting_Options"]["max_simultaneous_downloads_per_domain"] > 15: - global_settings["Rate_Limiting_Options"]["max_simultaneous_downloads_per_domain"] = 5 + + rate_limit_options = self.config_manager.global_settings_data.rate_limiting_options + if rate_limit_options.download_attempts >= 10: + rate_limit_options.download_attempts = 5 + if rate_limit_options.max_simultaneous_downloads_per_domain > 15: + rate_limit_options.max_simultaneous_downloads_per_domain = 5 self.config_manager.write_updated_global_settings_config() self.cache_manager.save("simp_settings_adjusted", True) - def args_startup(self) -> None: - """Start the args manager.""" - if not self.args_manager.parsed_args: - self.args_manager.startup() - """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" async def async_startup(self) -> None: """Async startup process for the manager.""" - self.args_consolidation() self.args_logging() if not isinstance(self.client_manager, ClientManager): @@ -118,18 +115,12 @@ async def async_startup(self) -> None: self.real_debrid_manager = RealDebridManager(self) await self.async_db_hash_startup() - # set files from args from cyberdrop_dl.utils import constants - constants.MAX_NAME_LENGTHS["FILE"] = int( - self.config_manager.global_settings_data["General"]["max_file_name_length"], - ) - constants.MAX_NAME_LENGTHS["FOLDER"] = int( - self.config_manager.global_settings_data["General"]["max_folder_name_length"], - ) + constants.MAX_NAME_LENGTHS["FILE"] = self.config_manager.global_settings_data.general.max_file_name_length + constants.MAX_NAME_LENGTHS["FOLDER"] = self.config_manager.global_settings_data.general.max_folder_name_length async def async_db_hash_startup(self) -> None: - # start up the db manager and hash manager only for scanning if not isinstance(self.db_manager, DBManager): self.db_manager = DBManager(self, self.path_manager.history_db) await self.db_manager.startup() @@ -143,26 +134,43 @@ async def async_db_hash_startup(self) -> None: def args_consolidation(self) -> None: """Consolidates runtime arguments with config values.""" - cli_settings_groups = ["Download_Options", "File_Size_Limits", "Ignore_Options", "Runtime_Options"] - parsed_args = self.args_manager.parsed_args - for arg in parsed_args: - for cli_settings_group in cli_settings_groups: - if arg in config_definitions.settings[cli_settings_group]: - if parsed_args[arg] == config_definitions.settings[cli_settings_group][arg]: - continue - if arg in self.args_manager.additive_args: - self.config_manager.settings_data[cli_settings_group][arg] += parsed_args[arg] - elif self.args_manager.parsed_args[arg] is not None: - self.config_manager.settings_data[cli_settings_group][arg] = parsed_args[arg] + cli_config_settings = self.parsed_args.config_settings.model_dump(exclude_unset=True) + cli_global_settings = self.parsed_args.global_settings.model_dump(exclude_unset=True) + + current_config_settings = self.config_manager.settings_data.model_dump() + current_global_settings = self.config_manager.global_settings_data.model_dump() + + merged_config_settings = self.merge_dicts(current_config_settings, cli_config_settings) + merged_global_settings = self.merge_dicts(current_global_settings, cli_global_settings) + + updated_config_settings = ConfigSettings.model_validate(merged_config_settings) + updated_global_settings = GlobalSettings.model_validate(merged_global_settings) + + self.config_manager.settings_data = updated_config_settings + self.config_manager.global_settings_data = updated_global_settings + + def merge_dicts(self, dict1: dict, dict2: dict): + for key, val in dict1.items(): + if isinstance(val, dict): + if key in dict2 and isinstance(dict2[key], dict): + self.merge_dicts(dict1[key], dict2[key]) + else: + if key in dict2: + dict1[key] = dict2[key] + + for key, val in dict2.items(): + if key not in dict1: + dict1[key] = val + + return dict1 def args_logging(self) -> None: """Logs the runtime arguments.""" forum_xf_cookies_provided = {} forum_credentials_provided = {} - auth_data_forums = self.config_manager.authentication_data["Forums"] - auth_data_others: dict[str, dict] = self.config_manager.authentication_data.copy() - auth_data_others.pop("Forums", None) + auth_data_forums = self.config_manager.authentication_data.forums.model_dump() + auth_data_others = self.config_manager.authentication_data.model_dump(exclude="forums") for forum in SupportedDomains.supported_forums_map.values(): forum_xf_cookies_provided[forum] = bool(auth_data_forums[f"{forum}_xf_user_cookie"]) @@ -178,29 +186,20 @@ def args_logging(self) -> None: for site, auth_entries in auth_data_others.items(): auth_provided[site] = all(auth_entries.values()) - print_settings = copy.deepcopy(self.config_manager.settings_data) - print_settings["Files"]["input_file"] = str(print_settings["Files"]["input_file"]) - print_settings["Files"]["download_folder"] = str(print_settings["Files"]["download_folder"]) - print_settings["Logs"]["log_folder"] = str(print_settings["Logs"]["log_folder"]) - print_settings["Logs"]["webhook_url"] = bool(print_settings["Logs"]["webhook_url"]) - print_settings["Sorting"]["sort_folder"] = str(print_settings["Sorting"]["sort_folder"]) - print_settings["Sorting"]["scan_folder"] = str(print_settings["Sorting"]["scan_folder"]) or "" - print_settings["Dupe_Cleanup_Options"]["hashing"] = print_settings["Dupe_Cleanup_Options"]["hashing"].name + config_settings = self.config_manager.settings_data.model_dump_json(indent=4) + global_settings = self.config_manager.global_settings_data.model_dump_json(indent=4) log(f"Starting Cyberdrop-DL Process - Config: {self.config_manager.loaded_config}", 10) log(f"Running version {__version__}", 10) log(f"Using Config: {self.config_manager.loaded_config}", 10) log(f"Using Config File: {self.config_manager.settings.resolve()}", 10) log(f"Using Input File: {self.path_manager.input_file.resolve()}", 10) - log(f"Using Download Folder: {self.path_manager.download_dir.resolve()}", 10) + log(f"Using Download Folder: {self.path_manager.download_folder.resolve()}", 10) log(f"Using History File: {self.path_manager.history_db.resolve()}", 10) log(f"Using Authentication: \n{json.dumps(auth_provided, indent=4, sort_keys=True)}", 10) - log(f"Using Settings: \n{json.dumps(print_settings, indent=4, sort_keys=True)}", 10) - log( - f"Using Global Settings: \n{json.dumps(self.config_manager.global_settings_data, indent=4, sort_keys=True)}", - 10, - ) + log(f"Using Settings: \n{config_settings}", 10) + log(f"Using Global Settings: \n{global_settings}", 10) async def close(self) -> None: """Closes the manager.""" diff --git a/cyberdrop_dl/managers/path_manager.py b/cyberdrop_dl/managers/path_manager.py index 2a6d382d7..b988bbfaf 100644 --- a/cyberdrop_dl/managers/path_manager.py +++ b/cyberdrop_dl/managers/path_manager.py @@ -22,87 +22,78 @@ class PathManager: def __init__(self, manager: Manager) -> None: self.manager = manager - self.download_dir: Path = field(init=False) - self.sorted_dir: Path = field(init=False) - self.scan_dir: Path = field(init=False) + self.download_folder: Path = field(init=False) + self.sorted_folder: Path = field(init=False) + self.scan_folder: Path = field(init=False) - self.log_dir: Path = field(init=False) + self.log_folder: Path = field(init=False) - self.cache_dir: Path = field(init=False) - self.config_dir: Path = field(init=False) + self.cache_folder: Path = field(init=False) + self.config_folder: Path = field(init=False) self.input_file: Path = field(init=False) self.history_db: Path = field(init=False) - self.main_log: Path = field(init=False) - self.last_post_log: Path = field(init=False) - self.unsupported_urls_log: Path = field(init=False) - self.download_error_log: Path = field(init=False) - self.scrape_error_log: Path = field(init=False) self._completed_downloads: set[MediaItem] = set() self._completed_downloads_set = set() self._prev_downloads = set() self._prev_downloads_set = set() def pre_startup(self) -> None: - if self.manager.args_manager.appdata_dir: - constants.APP_STORAGE = Path(self.manager.args_manager.appdata_dir) / "AppData" + if self.manager.parsed_args.cli_only_args.appdata_folder: + constants.APP_STORAGE = self.manager.parsed_args.cli_only_args.appdata_folder / "AppData" - self.cache_dir = constants.APP_STORAGE / "Cache" - self.config_dir = constants.APP_STORAGE / "Configs" + self.cache_folder = constants.APP_STORAGE / "Cache" + self.config_folder = constants.APP_STORAGE / "Configs" self.cookies_dir = constants.APP_STORAGE / "Cookies" - self.cache_dir.mkdir(parents=True, exist_ok=True) - self.config_dir.mkdir(parents=True, exist_ok=True) + self.cache_folder.mkdir(parents=True, exist_ok=True) + self.config_folder.mkdir(parents=True, exist_ok=True) + self.cookies_dir.mkdir(parents=True, exist_ok=True) + + def replace_config_in_path(self, path: Path) -> Path: + current_config = self.manager.config_manager.loaded_config + return Path(str(path).replace("{config}", current_config)) def startup(self) -> None: """Startup process for the Directory Manager.""" - self.download_dir = ( - self.manager.args_manager.download_dir - or self.manager.config_manager.settings_data["Files"]["download_folder"] - ) - - self.sorted_dir = ( - self.manager.args_manager.sort_folder or self.manager.config_manager.settings_data["Sorting"]["sort_folder"] - ) - - self.scan_dir = ( - self.manager.args_manager.scan_folder or self.manager.config_manager.settings_data["Sorting"]["scan_folder"] - ) - - self.log_dir = ( - self.manager.args_manager.log_dir or self.manager.config_manager.settings_data["Logs"]["log_folder"] - ) - self.input_file = ( - self.manager.args_manager.input_file or self.manager.config_manager.settings_data["Files"]["input_file"] - ) + settings_data = self.manager.config_manager.settings_data + self.download_folder = self.replace_config_in_path(settings_data.files.download_folder) + self.sorted_folder = self.replace_config_in_path(settings_data.sorting.sort_folder) + self.scan_folder = self.replace_config_in_path(settings_data.sorting.scan_folder) + self.log_folder = self.replace_config_in_path(settings_data.logs.log_folder) + self.input_file = self.replace_config_in_path(settings_data.files.input_file) + self.history_db = self.cache_folder / "cyberdrop.db" - self.history_db = self.cache_dir / "cyberdrop.db" + self._set_output_filenames() - current_time_iso = datetime.now().strftime("%Y%m%d_%H%M%S") - log_settings_config = self.manager.config_manager.settings_data["Logs"] - log_args_config = self.manager.args_manager - log_options_map = { - "main_log_filename": "main_log", - "last_forum_post_filename": "last_post_log", - "unsupported_urls_filename": "unsupported_urls_log", - "download_error_urls_filename": "download_error_log", - "scrape_error_urls_filename": "scrape_error_log", - } - - for log_config_name, log_internal_name in log_options_map.items(): - file_name = Path(getattr(log_args_config, log_config_name, None) or log_settings_config[log_config_name]) - file_ext = ".log" if log_internal_name == "main_log" else ".csv" - if log_settings_config["rotate_logs"]: - file_name = f"{file_name.stem}__{current_time_iso}{file_name.suffix}" - log_path = self.log_dir.joinpath(file_name).with_suffix(file_ext) - setattr(self, log_internal_name, log_path) - - self.log_dir.mkdir(parents=True, exist_ok=True) + self.log_folder.mkdir(parents=True, exist_ok=True) if not self.input_file.is_file(): self.input_file.touch(exist_ok=True) self.history_db.touch(exist_ok=True) + def _set_output_filenames(self) -> None: + current_time_iso = datetime.now().strftime("%Y%m%d_%H%M%S") + log_settings_config = self.manager.config_manager.settings_data.logs + log_files = log_settings_config.model_dump() + + for name, log_file in log_files.items(): + if "filename" not in name: + continue + is_main_log = log_file == log_settings_config.main_log_filename + file_ext = ".log" if is_main_log else ".csv" + file_name = log_file + path = Path(log_file) + if log_settings_config.rotate_logs: + file_name = f"{path.stem}__{current_time_iso}{path.suffix}" + log_files[name] = Path(file_name).with_suffix(file_ext).name + log_settings_config = log_settings_config.model_copy(update=log_files) + self.main_log = self.log_folder / log_settings_config.main_log_filename + self.last_forum_post_log = self.log_folder / log_settings_config.last_forum_post_filename + self.unsupported_urls_log = self.log_folder / log_settings_config.unsupported_urls_filename + self.download_error_log = self.log_folder / log_settings_config.download_error_urls_filename + self.scrape_error_log = self.log_folder / log_settings_config.scrape_error_urls_filename + def add_completed(self, media_item: MediaItem) -> None: self._completed_downloads.add(media_item) self._completed_downloads_set.add(media_item.complete_file.absolute()) diff --git a/cyberdrop_dl/managers/progress_manager.py b/cyberdrop_dl/managers/progress_manager.py index ed43d5866..b6b399214 100644 --- a/cyberdrop_dl/managers/progress_manager.py +++ b/cyberdrop_dl/managers/progress_manager.py @@ -25,13 +25,13 @@ def __init__(self, manager: Manager) -> None: # File Download Bars self.manager = manager self.file_progress: FileProgress = FileProgress( - manager.config_manager.global_settings_data["UI_Options"]["downloading_item_limit"], + manager.config_manager.global_settings_data.ui_options.downloading_item_limit, manager, ) # Scraping Printout self.scraping_progress: ScrapingProgress = ScrapingProgress( - manager.config_manager.global_settings_data["UI_Options"]["scraping_item_limit"], + manager.config_manager.global_settings_data.ui_options.scraping_item_limit, manager, ) @@ -42,7 +42,7 @@ def __init__(self, manager: Manager) -> None: self.hash_progress: HashProgress = HashProgress(manager) self.sort_progress: SortProgress = SortProgress(1, manager) - self.ui_refresh_rate = manager.config_manager.global_settings_data["UI_Options"]["refresh_rate"] + self.ui_refresh_rate = manager.config_manager.global_settings_data.ui_options.refresh_rate self.layout: Layout = field(init=False) self.hash_remove_layout: Layout = field(init=False) diff --git a/cyberdrop_dl/managers/realdebrid_manager.py b/cyberdrop_dl/managers/realdebrid_manager.py index 71b88e02a..53f99222a 100644 --- a/cyberdrop_dl/managers/realdebrid_manager.py +++ b/cyberdrop_dl/managers/realdebrid_manager.py @@ -24,7 +24,7 @@ class RealDebridManager: def __init__(self, manager: Manager) -> None: self.manager = manager - self.__api_token = self.manager.config_manager.authentication_data["RealDebrid"]["realdebrid_api_key"] + self.__api_token = self.manager.config_manager.authentication_data.realdebrid.api_key self.enabled = bool(self.__api_token) self.file_regex: Pattern = field(init=False) self.folder_regex: Pattern = field(init=False) diff --git a/cyberdrop_dl/scraper/crawler.py b/cyberdrop_dl/scraper/crawler.py index 705871dbd..3503dd9cb 100644 --- a/cyberdrop_dl/scraper/crawler.py +++ b/cyberdrop_dl/scraper/crawler.py @@ -134,20 +134,20 @@ async def handle_file( async def check_skip_by_config(self, media_item: MediaItem) -> bool: skip = False - if self.manager.config_manager.settings_data["Download_Options"]["skip_referer_seen_before"]: + if self.manager.config_manager.settings_data.download_options.skip_referer_seen_before: skip = await self.manager.db_manager.temp_referer_table.check_referer(media_item.referer) if skip: log(f"Download skip {media_item.url} as referer has been seen before", 10) - if not skip and self.manager.config_manager.settings_data["Ignore_Options"]["skip_hosts"]: - skip_hosts = self.manager.config_manager.settings_data["Ignore_Options"]["skip_hosts"] + if not skip and self.manager.config_manager.settings_data.ignore_options.skip_hosts: + skip_hosts = self.manager.config_manager.settings_data.ignore_options.skip_hosts if any(host in media_item.url.host for host in skip_hosts): log(f"Download skip {media_item.url} due to skip_hosts config", 10) skip = True - if not skip and self.manager.config_manager.settings_data["Ignore_Options"]["only_hosts"]: - only_hosts = self.manager.config_manager.settings_data["Ignore_Options"]["only_hosts"] + if not skip and self.manager.config_manager.settings_data.ignore_options.only_hosts: + only_hosts = self.manager.config_manager.settings_data.ignore_options.only_hosts if not any(host in media_item.url.host for host in only_hosts): log(f"Download skip {media_item.url} due to only_hosts config", 10) skip = True @@ -157,9 +157,7 @@ async def check_skip_by_config(self, media_item: MediaItem) -> bool: def check_post_number(self, post_number: int, current_post_number: int) -> tuple[bool, bool]: """Checks if the program should scrape the current post.""" """Returns (scrape_post, continue_scraping)""" - scrape_single_forum_post = self.manager.config_manager.settings_data["Download_Options"][ - "scrape_single_forum_post" - ] + scrape_single_forum_post = self.manager.config_manager.settings_data.download_options.scrape_single_forum_post if scrape_single_forum_post: if not post_number: @@ -258,7 +256,7 @@ def check_album_results(self, url: URL, album_results: dict[Any, Any]) -> bool: def create_scrape_item( parent_scrape_item: ScrapeItem, url: URL, - new_title_part: str, + new_title_part: str = "", part_of_album: bool = False, album_id: str | None = None, possible_datetime: int | None = None, @@ -271,32 +269,25 @@ def create_scrape_item( scrape_item.parents.append(add_parent) if new_title_part: scrape_item.add_to_parent_title(new_title_part) - scrape_item.part_of_album = part_of_album if part_of_album else scrape_item.part_of_album - if possible_datetime: - scrape_item.possible_datetime = possible_datetime - if album_id: - scrape_item.album_id = album_id + scrape_item.part_of_album = part_of_album or scrape_item.part_of_album + scrape_item.possible_datetime = possible_datetime or scrape_item.possible_datetime + scrape_item.album_id = album_id or scrape_item.album_id return scrape_item def create_title(self, title: str, album_id: str | None, thread_id: str | None) -> str: """Creates the title for the scrape item.""" + download_options = self.manager.config_manager.settings_data.download_options if not title: title = "Untitled" title = title.strip() - if ( - self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"] - and album_id - ): + if download_options.include_album_id_in_folder_name and album_id: title = f"{title} {album_id}" - if ( - self.manager.config_manager.settings_data["Download_Options"]["include_thread_id_in_folder_name"] - and thread_id - ): + if download_options.include_thread_id_in_folder_name and thread_id: title = f"{title} {thread_id}" - if not self.manager.config_manager.settings_data["Download_Options"]["remove_domains_from_folder_names"]: + if not download_options.remove_domains_from_folder_names: title = f"{title} ({self.folder_domain})" return title diff --git a/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py b/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py index d3450ad4e..3ce01184c 100644 --- a/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py @@ -84,9 +84,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) @@ -107,6 +107,15 @@ async def album(self, scrape_item: ScrapeItem) -> None: link = URL("https://" + scrape_item.url.host + link) link = URL(link) link = self.get_stream_link(link) + new_scrape_item = self.create_scrape_item( + scrape_item, + link, + "", + True, + album_id, + date, + add_parent=scrape_item.url, + ) # Try to get final file URL valid_extensions = FILE_FORMATS["Images"] | FILE_FORMATS["Videos"] @@ -124,24 +133,12 @@ async def album(self, scrape_item: ScrapeItem) -> None: msg = "No image found, reverting to parent" raise FileNotFoundError(msg) - new_scrape_item = self.create_scrape_item( - scrape_item, - link, - "", - True, - album_id, - date, - add_parent=scrape_item.url, - ) - src_filename, ext = get_filename_and_ext(src.name) if not self.check_album_results(src, results): await self.handle_file(src, new_scrape_item, src_filename, ext, custom_filename=filename) except FileNotFoundError: - self.manager.task_group.create_task( - self.run(ScrapeItem(link, scrape_item.parent_title, True, album_id, date)), - ) + self.manager.task_group.create_task(self.run(new_scrape_item)) scrape_item.children += 1 if scrape_item.children_limit and scrape_item.children >= scrape_item.children_limit: diff --git a/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py b/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py index 194e58cf8..07c8dc275 100644 --- a/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/celebforum_crawler.py @@ -62,10 +62,10 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: host_cookies = self.client.client_manager.cookies._cookies.get((self.primary_base_domain.host, ""), {}) session_cookie = host_cookies.get("xf_user").value if "xf_user" in host_cookies else None if not session_cookie: - session_cookie = self.manager.config_manager.authentication_data["Forums"]["celebforum_xf_user_cookie"] + session_cookie = self.manager.config_manager.authentication_data.forums.celebforum_xf_user_cookie - username = self.manager.config_manager.authentication_data["Forums"]["celebforum_username"] - password = self.manager.config_manager.authentication_data["Forums"]["celebforum_password"] + username = self.manager.config_manager.authentication_data.forums.celebforum_username + password = self.manager.config_manager.authentication_data.forums.celebforum_password wait_time = 5 self.login_attempts += 1 @@ -89,9 +89,9 @@ async def forum(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) post_sections = (scrape_item.url.parts[3], scrape_item.url.fragment) if len(scrape_item.url.parts) > 3 and any("post-" in sec for sec in post_sections): @@ -166,7 +166,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: int) -> None: """Scrapes a post.""" - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: scrape_item = self.create_scrape_item(scrape_item, scrape_item.url, "") scrape_item.add_to_parent_title("post-" + str(post_number)) @@ -174,9 +174,9 @@ async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: in scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) posts_scrapers = [self.links, self.images, self.videos, self.embeds, self.attachments] diff --git a/cyberdrop_dl/scraper/crawlers/coomer_crawler.py b/cyberdrop_dl/scraper/crawlers/coomer_crawler.py index 1fc902bdc..1e144ac4e 100644 --- a/cyberdrop_dl/scraper/crawlers/coomer_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/coomer_crawler.py @@ -50,7 +50,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def favorites(self, scrape_item: ScrapeItem) -> None: """Scrapes the users' favourites and creates scrape items for each artist found.""" - if not self.manager.config_manager.authentication_data["Coomer"]["session"]: + if not self.manager.config_manager.authentication_data.coomer.session: raise ScrapeError( 401, message="No session cookie found in the config file, cannot scrape favorites", @@ -59,7 +59,7 @@ async def favorites(self, scrape_item: ScrapeItem) -> None: async with self.request_limiter: # Use the session cookie to get the user's favourites self.client.client_manager.cookies.update_cookies( - {"session": self.manager.config_manager.authentication_data["Coomer"]["session"]}, + {"session": self.manager.config_manager.authentication_data.coomer.session}, response_url=self.primary_base_domain, ) favourites_api_url = (self.api_url / "account/favorites").with_query({"type": "artist"}) @@ -82,9 +82,9 @@ async def profile(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) while True: async with self.request_limiter: @@ -116,19 +116,16 @@ async def post(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def handle_post_content(self, scrape_item: ScrapeItem, post: dict, user: str, user_str: str) -> None: """Handles the content of a post.""" - if ( - "#ad" in post["content"] - and self.manager.config_manager.settings_data["Ignore_Options"]["ignore_coomer_ads"] - ): + if "#ad" in post["content"] and self.manager.config_manager.settings_data.ignore_options.ignore_coomer_ads: return scrape_item.type = FILE_HOST_ALBUM scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) date = post.get("published") or post.get("added") date = date.replace("T", " ") @@ -189,9 +186,9 @@ async def create_new_scrape_item( ) -> None: """Creates a new scrape item with the same parent as the old scrape item.""" post_title = None - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: post_title = f"{date} - {title}" - if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: + if self.manager.config_manager.settings_data.download_options.include_album_id_in_folder_name: post_title = post_id + " - " + post_title new_title = self.create_title(user, None, None) diff --git a/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py b/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py index 81c57fc07..f68f22b94 100644 --- a/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/cyberdrop_crawler.py @@ -53,9 +53,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) scrape_item.album_id = scrape_item.url.parts[2] scrape_item.part_of_album = True diff --git a/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py b/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py index 0ae9d6804..6c8ff1f83 100644 --- a/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py @@ -63,9 +63,11 @@ async def folder(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[ + scrape_item.type + ] + ) page = 1 while True: @@ -120,9 +122,11 @@ async def shared(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[ + scrape_item.type + ] + ) page = 1 while True: diff --git a/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py b/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py index 35d1b31da..4dd6669bf 100644 --- a/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/ehentai_crawler.py @@ -56,9 +56,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) images = soup.select("div[class=gdtm] div a") for image in images: diff --git a/cyberdrop_dl/scraper/crawlers/erome_crawler.py b/cyberdrop_dl/scraper/crawlers/erome_crawler.py index 375792609..d35073e8d 100644 --- a/cyberdrop_dl/scraper/crawlers/erome_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/erome_crawler.py @@ -48,9 +48,9 @@ async def profile(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) for album in albums: link = URL(album["href"]) @@ -81,9 +81,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) async with self.request_limiter: soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item) diff --git a/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py b/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py index 7936f14ad..7cea515de 100644 --- a/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/f95zone_crawler.py @@ -62,10 +62,10 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: host_cookies = self.client.client_manager.cookies._cookies.get((self.primary_base_domain.host, ""), {}) session_cookie = host_cookies.get("xf_user").value if "xf_user" in host_cookies else None if not session_cookie: - session_cookie = self.manager.config_manager.authentication_data["Forums"]["f95zone_xf_user_cookie"] + session_cookie = self.manager.config_manager.authentication_data.forums.f95zone_xf_user_cookie - username = self.manager.config_manager.authentication_data["Forums"]["f95zone_username"] - password = self.manager.config_manager.authentication_data["Forums"]["f95zone_password"] + username = self.manager.config_manager.authentication_data.forums.f95zone_username + password = self.manager.config_manager.authentication_data.forums.f95zone_password wait_time = 5 self.login_attempts += 1 @@ -89,9 +89,9 @@ async def forum(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) post_sections = (scrape_item.url.parts[3], scrape_item.url.fragment) if len(scrape_item.url.parts) > 3 and any("post-" in sec for sec in post_sections): @@ -165,7 +165,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: int) -> None: """Scrapes a post.""" - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: scrape_item = self.create_scrape_item(scrape_item, scrape_item.url, "") scrape_item.add_to_parent_title("post-" + str(post_number)) @@ -173,9 +173,9 @@ async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: in scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) posts_scrapers = [self.links, self.images, self.videos, self.embeds, self.attachments] diff --git a/cyberdrop_dl/scraper/crawlers/fapello_crawler.py b/cyberdrop_dl/scraper/crawlers/fapello_crawler.py index f3104678e..56e0d6895 100644 --- a/cyberdrop_dl/scraper/crawlers/fapello_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/fapello_crawler.py @@ -53,9 +53,9 @@ async def profile(self, scrape_item: ScrapeItem) -> None: scrape_item.type = FILE_HOST_PROFILE with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title = self.create_title( soup.select_one('h2[class="font-semibold lg:text-2xl text-lg mb-2 mt-4"]').get_text(), @@ -109,9 +109,9 @@ async def post(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) async with self.request_limiter: soup = await self.client.get_soup(self.domain, scrape_item.url) diff --git a/cyberdrop_dl/scraper/crawlers/gofile_crawler.py b/cyberdrop_dl/scraper/crawlers/gofile_crawler.py index 5b0c28606..faa703417 100644 --- a/cyberdrop_dl/scraper/crawlers/gofile_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/gofile_crawler.py @@ -108,9 +108,11 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[ + scrape_item.type + ] + ) contents = JSON_Resp["children"] for content_id in contents: @@ -156,7 +158,7 @@ async def get_token(self, create_acct_address: URL, session: ScraperClient) -> N self.headers["Authorization"] = f"Bearer {self.token}" return - api_token = self.manager.config_manager.authentication_data["GoFile"]["gofile_api_key"] + api_token = self.manager.config_manager.authentication_data.gofile.api_key if api_token: self.token = api_token self.headers["Authorization"] = f"Bearer {self.token}" diff --git a/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py b/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py index 78c0873ca..826ee0cf1 100644 --- a/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/hotpic_crawler.py @@ -54,9 +54,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) files = soup.select("a[class*=spotlight]") for file in files: diff --git a/cyberdrop_dl/scraper/crawlers/imageban_crawler.py b/cyberdrop_dl/scraper/crawlers/imageban_crawler.py index c04fadc00..cac6e45bd 100644 --- a/cyberdrop_dl/scraper/crawlers/imageban_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imageban_crawler.py @@ -61,9 +61,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) for image in images: link_path = image.get("href") @@ -100,9 +100,9 @@ async def compilation(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) for image in images: link = URL(image.get("src")) diff --git a/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py b/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py index cf05569ae..707c0ea89 100644 --- a/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imgbb_crawler.py @@ -57,9 +57,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title = self.create_title( soup.select_one("a[data-text=album-name]").get_text(), diff --git a/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py b/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py index 6356a944e..3a77d73af 100644 --- a/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imgbox_crawler.py @@ -60,9 +60,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title = self.create_title( soup.select_one("div[id=gallery-view] h1").get_text().strip().rsplit(" - ", 1)[0], diff --git a/cyberdrop_dl/scraper/crawlers/imgur_crawler.py b/cyberdrop_dl/scraper/crawlers/imgur_crawler.py index 46ab1082b..2478a217f 100644 --- a/cyberdrop_dl/scraper/crawlers/imgur_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/imgur_crawler.py @@ -20,7 +20,7 @@ class ImgurCrawler(Crawler): def __init__(self, manager: Manager) -> None: super().__init__(manager, "imgur", "Imgur") self.imgur_api = URL("https://api.imgur.com/3/") - self.imgur_client_id = self.manager.config_manager.authentication_data["Imgur"]["imgur_client_id"] + self.imgur_client_id = self.manager.config_manager.authentication_data.imgur.client_id self.imgur_client_remaining = 12500 self.headers = {"Authorization": f"Client-ID {self.imgur_client_id}"} self.request_limiter = AsyncLimiter(10, 1) @@ -51,9 +51,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) album_id = scrape_item.url.parts[-1] scrape_item.album_id = album_id diff --git a/cyberdrop_dl/scraper/crawlers/kemono_crawler.py b/cyberdrop_dl/scraper/crawlers/kemono_crawler.py index db82b8a0d..964ba421a 100644 --- a/cyberdrop_dl/scraper/crawlers/kemono_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/kemono_crawler.py @@ -58,9 +58,9 @@ async def profile(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) while True: async with self.request_limiter: JSON_Resp = await self.client.get_json( @@ -88,9 +88,9 @@ async def discord(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) while True: async with self.request_limiter: JSON_Resp = await self.client.get_json( @@ -125,9 +125,9 @@ async def handle_post_content(self, scrape_item: ScrapeItem, post: dict, user: s scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) date = post.get("published") or post.get("added") date = date.replace("T", " ") @@ -170,9 +170,9 @@ async def get_content_links(self, scrape_item: ScrapeItem, post: dict, user: str title = post.get("title", "") post_title = None - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: post_title = f"{date} - {title}" - if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: + if self.manager.config_manager.settings_data.download_options.include_album_id_in_folder_name: post_title = post_id + " - " + post_title new_title = self.create_title(user, None, None) @@ -241,9 +241,9 @@ async def create_new_scrape_item( ) -> None: """Creates a new scrape item with the same parent as the old scrape item.""" post_title = None - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: post_title = f"{date} - {title}" - if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: + if self.manager.config_manager.settings_data.download_options.include_album_id_in_folder_name: post_title = post_id + " - " + post_title new_title = self.create_title(user, None, None) diff --git a/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py b/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py index d0c58f1ff..1689a0407 100644 --- a/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/leakedmodels_crawler.py @@ -63,11 +63,10 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: host_cookies = self.client.client_manager.cookies._cookies.get((self.primary_base_domain.host, ""), {}) session_cookie = host_cookies.get("xf_user").value if "xf_user" in host_cookies else None if not session_cookie: - session_cookie = self.manager.config_manager.authentication_data["Forums"][ - "leakedmodels_xf_user_cookie" - ] - username = self.manager.config_manager.authentication_data["Forums"]["leakedmodels_username"] - password = self.manager.config_manager.authentication_data["Forums"]["leakedmodels_password"] + session_cookie = self.manager.config_manager.authentication_data.forums.leakedmodels_xf_user_cookie + + username = self.manager.config_manager.authentication_data.forums.leakedmodels_username + password = self.manager.config_manager.authentication_data.forums.leakedmodels_password wait_time = 5 self.login_attempts += 1 @@ -97,9 +96,9 @@ async def forum(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) post_sections = (scrape_item.url.parts[3], scrape_item.url.fragment) if len(scrape_item.url.parts) > 3 and any("post-" in sec for sec in post_sections): url_parts = str(scrape_item.url).rsplit("post-", 1) @@ -172,7 +171,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: int) -> None: """Scrapes a post.""" - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: scrape_item = self.create_scrape_item(scrape_item, scrape_item.url, "") scrape_item.add_to_parent_title("post-" + str(post_number)) @@ -180,9 +179,9 @@ async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: in scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) posts_scrapers = [self.links, self.images, self.videos, self.embeds, self.attachments] diff --git a/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py b/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py index c22c3b043..ec6decde4 100644 --- a/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/mediafire_crawler.py @@ -53,9 +53,9 @@ async def folder(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) scrape_item.album_id = folder_key scrape_item.part_of_album = True diff --git a/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py b/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py index d62448202..6f8e94082 100644 --- a/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/nekohouse_crawler.py @@ -264,9 +264,9 @@ async def create_new_scrape_item( ) -> None: """Creates a new scrape item with the same parent as the old scrape item.""" post_title = None - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: post_title = f"{date} - {title}" - if self.manager.config_manager.settings_data["Download_Options"]["include_album_id_in_folder_name"]: + if self.manager.config_manager.settings_data.download_options.include_album_id_in_folder_name: post_title = post_id + " - " + post_title new_title = self.create_title(user, None, None) diff --git a/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py b/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py index b27a83edb..449ed64d2 100644 --- a/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/nudostar_crawler.py @@ -63,10 +63,10 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: host_cookies = self.client.client_manager.cookies._cookies.get((self.primary_base_domain.host, ""), {}) session_cookie = host_cookies.get("xf_user").value if "xf_user" in host_cookies else None if not session_cookie: - session_cookie = self.manager.config_manager.authentication_data["Forums"]["nudostar_xf_user_cookie"] + session_cookie = self.manager.config_manager.authentication_data.forums.nudostar_xf_user_cookie - username = self.manager.config_manager.authentication_data["Forums"]["nudostar_username"] - password = self.manager.config_manager.authentication_data["Forums"]["nudostar_password"] + username = self.manager.config_manager.authentication_data.forums.nudostar_username + password = self.manager.config_manager.authentication_data.forums.nudostar_password wait_time = 5 self.login_attempts += 1 @@ -90,9 +90,9 @@ async def forum(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) post_sections = (scrape_item.url.parts[3], scrape_item.url.fragment) if len(scrape_item.url.parts) > 3 and any("post-" in sec for sec in post_sections): url_parts = str(scrape_item.url).rsplit("post-", 1) @@ -165,7 +165,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: int) -> None: """Scrapes a post.""" - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: scrape_item = self.create_scrape_item(scrape_item, scrape_item.url, "") scrape_item.add_to_parent_title("post-" + str(post_number)) @@ -173,9 +173,9 @@ async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: in scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) posts_scrapers = [self.links, self.images, self.videos, self.embeds, self.attachments] diff --git a/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py b/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py index 85960c638..c1df9650d 100644 --- a/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py @@ -52,9 +52,9 @@ async def series(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) scripts = soup.select("script") series_id = None diff --git a/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py b/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py index 5b4e77cbe..d893abf44 100644 --- a/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/pimpandhost_crawler.py @@ -50,9 +50,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title = self.create_title( soup.select_one("span[class=author-header__album-name]").get_text(), diff --git a/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py b/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py index 5d7895314..84e48e06c 100644 --- a/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py @@ -47,9 +47,9 @@ async def folder(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) async with self.request_limiter: JSON_Resp = await self.client.get_json( diff --git a/cyberdrop_dl/scraper/crawlers/postimg_crawler.py b/cyberdrop_dl/scraper/crawlers/postimg_crawler.py index 3bf6d81e1..58271910b 100644 --- a/cyberdrop_dl/scraper/crawlers/postimg_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/postimg_crawler.py @@ -48,9 +48,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) for i in itertools.count(1): data["page"] = i async with self.request_limiter: diff --git a/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py b/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py index 0c6a7afff..2fbe060dc 100644 --- a/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/realbooru_crawler.py @@ -56,9 +56,9 @@ async def tag(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) content = soup.select("div[class=items] div a") for file_page in content: diff --git a/cyberdrop_dl/scraper/crawlers/reddit_crawler.py b/cyberdrop_dl/scraper/crawlers/reddit_crawler.py index 85bafa811..7462904c1 100644 --- a/cyberdrop_dl/scraper/crawlers/reddit_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/reddit_crawler.py @@ -28,10 +28,8 @@ class RedditCrawler(Crawler): def __init__(self, manager: Manager, site: str) -> None: super().__init__(manager, site, "Reddit") - self.reddit_personal_use_script = self.manager.config_manager.authentication_data["Reddit"][ - "reddit_personal_use_script" - ] - self.reddit_secret = self.manager.config_manager.authentication_data["Reddit"]["reddit_secret"] + self.reddit_personal_use_script = self.manager.config_manager.authentication_data.reddit.personal_use_script + self.reddit_secret = self.manager.config_manager.authentication_data.reddit.secret self.request_limiter = AsyncLimiter(5, 1) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @@ -109,9 +107,9 @@ async def get_posts( scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) for submission in submissions_list: await self.post(scrape_item, submission, reddit) @@ -170,9 +168,9 @@ async def gallery(self, scrape_item: ScrapeItem, submission: Submission, reddit: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) for link in links: new_scrape_item = await self.create_new_scrape_item( link, @@ -227,6 +225,6 @@ async def create_new_scrape_item( date, add_parent=add_parent, ) - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: new_scrape_item.add_to_parent_title(title) return new_scrape_item diff --git a/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py b/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py index b4e18f358..f96eee9ed 100644 --- a/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/redgifs_crawler.py @@ -52,9 +52,9 @@ async def user(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) while page <= total_pages: async with self.request_limiter: JSON_Resp = await self.client.get_json( diff --git a/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py b/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py index 60f17fb27..cafda9120 100644 --- a/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py @@ -52,9 +52,9 @@ async def tag(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) content_block = soup.select_one('div[class="box-grid ng-star-inserted"]') content = content_block.select('a[class="box ng-star-inserted"]') @@ -91,9 +91,9 @@ async def playlist(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title_str = soup.select_one("div[class*=title]").text scrape_item.part_of_album = True diff --git a/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py b/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py index 9a7a8dc7e..2eae9fea6 100644 --- a/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/rule34xxx_crawler.py @@ -54,9 +54,9 @@ async def tag(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title_portion = scrape_item.url.query["tags"].strip() title = self.create_title(title_portion, None, None) diff --git a/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py b/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py index 015755ac5..9850cc1da 100644 --- a/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/rule34xyz_crawler.py @@ -48,9 +48,9 @@ async def tag(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title = self.create_title(scrape_item.url.parts[1], None, None) scrape_item.part_of_album = True diff --git a/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py b/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py index d7be87a6f..30bc612ea 100644 --- a/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/scrolller_crawler.py @@ -49,9 +49,9 @@ async def subreddit(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) request_body = { "query": """ diff --git a/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py b/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py index 7a2beade4..532e6f252 100644 --- a/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/simpcity_crawler.py @@ -63,13 +63,11 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: host_cookies = self.client.client_manager.cookies._cookies.get((self.primary_base_domain.host, ""), {}) session_cookie = host_cookies.get("xf_user").value if "xf_user" in host_cookies else None if not session_cookie: - session_cookie = self.manager.config_manager.authentication_data["Forums"].get( - "simpcity_xf_user_cookie" - ) + session_cookie = self.manager.config_manager.authentication_data.forums.simpcity_xf_user_cookie - session_cookie = self.manager.config_manager.authentication_data["Forums"]["simpcity_xf_user_cookie"] - username = self.manager.config_manager.authentication_data["Forums"]["simpcity_username"] - password = self.manager.config_manager.authentication_data["Forums"]["simpcity_password"] + session_cookie = self.manager.config_manager.authentication_data.forums.simpcity_xf_user_cookie + username = self.manager.config_manager.authentication_data.forums.simpcity_username + password = self.manager.config_manager.authentication_data.forums.simpcity_password wait_time = 5 if session_cookie or (username and password): @@ -94,9 +92,9 @@ async def forum(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) post_sections = (scrape_item.url.parts[3], scrape_item.url.fragment) if len(scrape_item.url.parts) > 3 and any("post-" in sec for sec in post_sections): url_parts = str(scrape_item.url).rsplit("post-", 1) @@ -172,7 +170,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: int) -> None: """Scrapes a post.""" - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: scrape_item = self.create_scrape_item(scrape_item, scrape_item.url, "") scrape_item.add_to_parent_title("post-" + str(post_number)) @@ -180,9 +178,9 @@ async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: in scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) posts_scrapers = [self.links, self.images, self.videos, self.embeds, self.attachments] diff --git a/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py b/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py index f1a649910..61ca8d3b7 100644 --- a/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/socialmediagirls_crawler.py @@ -63,12 +63,10 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: host_cookies = self.client.client_manager.cookies._cookies.get((self.primary_base_domain.host, ""), {}) session_cookie = host_cookies.get("xf_user").value if "xf_user" in host_cookies else None if not session_cookie: - session_cookie = self.manager.config_manager.authentication_data["Forums"][ - "socialmediagirls_xf_user_cookie" - ] + session_cookie = self.manager.config_manager.authentication_data.forums.socialmediagirls_xf_user_cookie - username = self.manager.config_manager.authentication_data["Forums"]["socialmediagirls_username"] - password = self.manager.config_manager.authentication_data["Forums"]["socialmediagirls_password"] + username = self.manager.config_manager.authentication_data.forums.socialmediagirls_username + password = self.manager.config_manager.authentication_data.forums.socialmediagirls_password wait_time = 10 self.login_attempts += 1 @@ -92,9 +90,9 @@ async def forum(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) thread_index = scrape_item.url.parts.index("threads") post_sections = (scrape_item.url.parts[thread_index + 1], scrape_item.url.fragment) if len(scrape_item.url.parts) > 3 and any("post-" in sec for sec in post_sections): @@ -167,7 +165,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: int) -> None: """Scrapes a post.""" - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: scrape_item = self.create_scrape_item(scrape_item, scrape_item.url, "") scrape_item.add_to_parent_title("post-" + str(post_number)) @@ -175,9 +173,9 @@ async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: in scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) posts_scrapers = [self.links, self.images, self.videos, self.embeds, self.attachments] diff --git a/cyberdrop_dl/scraper/crawlers/toonily_crawler.py b/cyberdrop_dl/scraper/crawlers/toonily_crawler.py index 0ff41bbef..19c788e86 100644 --- a/cyberdrop_dl/scraper/crawlers/toonily_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/toonily_crawler.py @@ -50,9 +50,9 @@ async def series(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) chapters = soup.select("li[class*=wp-manga-chapter] a") for chapter in chapters: @@ -86,9 +86,9 @@ async def chapter(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title_parts = soup.select_one("title").get_text().split(" - ") series_name = title_parts[0] diff --git a/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py b/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py index eac67005d..35225e413 100644 --- a/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/xbunker_crawler.py @@ -67,10 +67,10 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: host_cookies = self.client.client_manager.cookies._cookies.get((self.primary_base_domain.host, ""), {}) session_cookie = host_cookies.get("xf_user").value if "xf_user" in host_cookies else None if not session_cookie: - session_cookie = self.manager.config_manager.authentication_data["Forums"]["xbunker_xf_user_cookie"] + session_cookie = self.manager.config_manager.authentication_data.forums.xbunker_xf_user_cookie - username = self.manager.config_manager.authentication_data["Forums"]["xbunker_username"] - password = self.manager.config_manager.authentication_data["Forums"]["xbunker_password"] + username = self.manager.config_manager.authentication_data.forums.xbunker_username + password = self.manager.config_manager.authentication_data.forums.xbunker_password wait_time = 5 self.login_attempts += 1 @@ -94,9 +94,9 @@ async def forum(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) post_sections = (scrape_item.url.parts[3], scrape_item.url.fragment) if len(scrape_item.url.parts) > 3 and any("post-" in sec for sec in post_sections): url_parts = str(scrape_item.url).rsplit("post-", 1) @@ -169,7 +169,7 @@ async def forum(self, scrape_item: ScrapeItem) -> None: @error_handling_wrapper async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: int) -> None: """Scrapes a post.""" - if self.manager.config_manager.settings_data["Download_Options"]["separate_posts"]: + if self.manager.config_manager.settings_data.download_options.separate_posts: scrape_item = self.create_scrape_item(scrape_item, scrape_item.url, "") scrape_item.add_to_parent_title("post-" + str(post_number)) @@ -177,9 +177,9 @@ async def post(self, scrape_item: ScrapeItem, post_content: Tag, post_number: in scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) posts_scrapers = [self.links, self.images, self.videos, self.embeds, self.attachments] diff --git a/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py b/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py index 83ac62c28..dfe6a2664 100644 --- a/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/xbunkr_crawler.py @@ -50,9 +50,9 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.children = scrape_item.children_limit = 0 with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = self.manager.config_manager.settings_data["Download_Options"][ - "maximum_number_of_children" - ][scrape_item.type] + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) title = self.create_title(soup.select_one("h1[id=title]").text, scrape_item.album_id, None) diff --git a/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py b/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py index f513e000d..93d13a637 100644 --- a/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/xxxbunker_crawler.py @@ -224,7 +224,7 @@ async def parse_relative_date(relative_date: timedelta | str) -> int: async def check_session_cookie(self) -> None: """Get Cookie from config file.""" - self.session_cookie = self.manager.config_manager.authentication_data["XXXBunker"]["PHPSESSID"] + self.session_cookie = self.manager.config_manager.authentication_data.xxxbunker.PHPSESSID if not self.session_cookie: self.session_cookie = "" return diff --git a/cyberdrop_dl/scraper/jdownloader.py b/cyberdrop_dl/scraper/jdownloader.py index 2c5603e24..0ad0d99b7 100644 --- a/cyberdrop_dl/scraper/jdownloader.py +++ b/cyberdrop_dl/scraper/jdownloader.py @@ -45,16 +45,14 @@ class JDownloader: """Class that handles connecting and passing links to JDownloader.""" def __init__(self, manager: Manager) -> None: - self.enabled = manager.config_manager.settings_data["Runtime_Options"]["send_unsupported_to_jdownloader"] - self.jdownloader_device = manager.config_manager.authentication_data["JDownloader"]["jdownloader_device"] - self.jdownloader_username = manager.config_manager.authentication_data["JDownloader"]["jdownloader_username"] - self.jdownloader_password = manager.config_manager.authentication_data["JDownloader"]["jdownloader_password"] - self.jdownloader_download_dir = manager.config_manager.settings_data["Runtime_Options"][ - "jdownloader_download_dir" - ] - self.jdownloader_autostart = manager.config_manager.settings_data["Runtime_Options"]["jdownloader_autostart"] + self.enabled = manager.config_manager.settings_data.runtime_options.send_unsupported_to_jdownloader + self.jdownloader_device = manager.config_manager.authentication_data.jdownloader.device + self.jdownloader_username = manager.config_manager.authentication_data.jdownloader.username + self.jdownloader_password = manager.config_manager.authentication_data.jdownloader.password + self.jdownloader_download_dir = manager.config_manager.settings_data.runtime_options.jdownloader_download_dir + self.jdownloader_autostart = manager.config_manager.settings_data.runtime_options.jdownloader_autostart if not self.jdownloader_download_dir: - self.jdownloader_download_dir = manager.path_manager.download_dir + self.jdownloader_download_dir = manager.path_manager.download_folder self.jdownloader_download_dir = Path(self.jdownloader_download_dir) self.jdownloader_agent = field(init=False) diff --git a/cyberdrop_dl/scraper/scraper.py b/cyberdrop_dl/scraper/scraper.py index 2b2cd9cf2..939b115b6 100644 --- a/cyberdrop_dl/scraper/scraper.py +++ b/cyberdrop_dl/scraper/scraper.py @@ -40,9 +40,7 @@ def __init__(self, manager: Manager) -> None: self.existing_crawlers: dict[str, Crawler] = {} self.no_crawler_downloader = Downloader(self.manager, "no_crawler") self.jdownloader = JDownloader(self.manager) - self.jdownloader_whitelist = self.manager.config_manager.settings_data["Runtime_Options"][ - "jdownloader_whitelist" - ] + self.jdownloader_whitelist = self.manager.config_manager.settings_data.runtime_options.jdownloader_whitelist self.lock = asyncio.Lock() self.count = 0 @@ -92,11 +90,11 @@ async def start(self) -> None: self.no_crawler_downloader.startup() - if self.manager.args_manager.retry_failed: + if self.manager.parsed_args.cli_only_args.retry_failed: await self.load_failed_links() - elif self.manager.args_manager.retry_all: + elif self.manager.parsed_args.cli_only_args.retry_all: await self.load_all_links() - elif self.manager.args_manager.retry_maintenance: + elif self.manager.parsed_args.cli_only_args.retry_maintenance: await self.load_all_bunkr_failed_links_via_hash() else: await self.load_links() @@ -150,11 +148,11 @@ async def load_links(self) -> None: input_file.touch(exist_ok=True) links = {"": []} - if not self.manager.args_manager.other_links: + if not self.manager.parsed_args.cli_only_args.links: links = await self.parse_input_file_groups() else: - links[""].extend(self.manager.args_manager.other_links) + links[""].extend(self.manager.parsed_args.cli_only_args.links) links = {k: list(filter(None, v)) for k, v in links.items()} items = [] @@ -179,24 +177,24 @@ async def load_failed_links(self) -> None: item = self.create_item_from_entry(entry) if self.filter_items(item): items.append(item) - if self.manager.args_manager.max_items: - items = items[: self.manager.args_manager.max_items] + if self.manager.parsed_args.cli_only_args.max_items_retry: + items = items[: self.manager.parsed_args.cli_only_args.max_items_retry] for item in items: self.manager.task_group.create_task(self.send_to_crawler(item)) async def load_all_links(self) -> None: """Loads all links from database.""" entries = await self.manager.db_manager.history_table.get_all_items( - self.manager.args_manager.after, - self.manager.args_manager.before, + self.manager.parsed_args.cli_only_args.completed_after, + self.manager.parsed_args.cli_only_args.completed_before, ) items = [] for entry in entries: item = self.create_item_from_entry(entry) if self.filter_items(item): items.append(item) - if self.manager.args_manager.max_items: - items = items[: self.manager.args_manager.max_items] + if self.manager.parsed_args.cli_only_args.max_items_retry: + items = items[: self.manager.parsed_args.cli_only_args.max_items_retry] for item in items: self.manager.task_group.create_task(self.send_to_crawler(item)) @@ -209,8 +207,8 @@ async def load_all_bunkr_failed_links_via_hash(self) -> None: item = self.create_item_from_entry(entry) if self.filter_items(item): items.append(item) - if self.manager.args_manager.max_items: - items = items[: self.manager.args_manager.max_items] + if self.manager.parsed_args.cli_only_args.max_items_retry: + items = items[: self.manager.parsed_args.cli_only_args.max_items_retry] for item in items: self.manager.task_group.create_task(self.send_to_crawler(item)) @@ -225,16 +223,14 @@ async def filter_and_send_to_crawler(self, scrape_item: ScrapeItem) -> None: @staticmethod def create_item_from_link(link: URL) -> ScrapeItem: - item = ScrapeItem(url=link, parent_title="") - item.completed_at = None - item.created_at = None + item = ScrapeItem(url=link) return item @staticmethod def create_item_from_entry(entry: list) -> ScrapeItem: - link = URL(entry[0]) + url = URL(entry[0]) retry_path = Path(entry[1]) - scrape_item = ScrapeItem(link, parent_title="", part_of_album=True, retry=True, retry_path=retry_path) + scrape_item = ScrapeItem(url=url, part_of_album=True, retry=True, retry_path=retry_path) completed_at = entry[2] created_at = entry[3] if not isinstance(scrape_item.url, URL): @@ -284,7 +280,7 @@ async def send_to_crawler(self, scrape_item: ScrapeItem) -> None: success = False try: download_folder = get_download_path(self.manager, scrape_item, "jdownloader") - relative_download_dir = download_folder.relative_to(self.manager.path_manager.download_dir) + relative_download_dir = download_folder.relative_to(self.manager.path_manager.download_folder) self.jdownloader.direct_unsupported_to_jdownloader( scrape_item.url, scrape_item.parent_title, @@ -316,18 +312,18 @@ def filter_items(self, scrape_item: ScrapeItem) -> bool: log(f"Skipping {scrape_item.url} as it is a blocked domain", 10) return False - before = self.manager.args_manager.before - after = self.manager.args_manager.after + before = self.manager.parsed_args.cli_only_args.completed_before + after = self.manager.parsed_args.cli_only_args.completed_after if is_outside_date_range(scrape_item, before, after): log(f"Skipping {scrape_item.url} as it is outside of the desired date range", 10) return False - skip_hosts = self.manager.config_manager.settings_data["Ignore_Options"]["skip_hosts"] + skip_hosts = self.manager.config_manager.settings_data.ignore_options.skip_hosts if skip_hosts and is_in_domain_list(scrape_item, skip_hosts): log(f"Skipping URL by skip_hosts config: {scrape_item.url}", 10) return False - only_hosts = self.manager.config_manager.settings_data["Ignore_Options"]["only_hosts"] + only_hosts = self.manager.config_manager.settings_data.ignore_options.only_hosts if only_hosts and not is_in_domain_list(scrape_item, only_hosts): log(f"Skipping URL by only_hosts config: {scrape_item.url}", 10) return False @@ -347,7 +343,7 @@ async def skip_no_crawler_by_config(self, scrape_item: ScrapeItem) -> bool: posible_referer = scrape_item.parents[-1] if scrape_item.parents else scrape_item.url check_referer = False - if self.manager.config_manager.settings_data["Download_Options"]["skip_referer_seen_before"]: + if self.manager.config_manager.settings_data.download_options.skip_referer_seen_before: check_referer = await self.manager.db_manager.temp_referer_table.check_referer(posible_referer) if check_referer: diff --git a/cyberdrop_dl/ui/program_ui.py b/cyberdrop_dl/ui/program_ui.py index 2ae079544..77f15d972 100644 --- a/cyberdrop_dl/ui/program_ui.py +++ b/cyberdrop_dl/ui/program_ui.py @@ -84,7 +84,7 @@ def _download(self) -> True: def _retry_failed_download(self) -> True: """Sets retry failed and starts download process.""" - self.manager.args_manager.retry_failed = True + self.manager.parsed_args.cli_only_args.retry_failed = True return True def _scan_and_create_hashes(self) -> None: @@ -129,6 +129,9 @@ def _import_v4_download_history(self) -> None: def _change_config(self) -> None: configs = self.manager.config_manager.get_configs() selected_config = user_prompts.select_config(configs) + if selected_config.casefold() == "all": + self.manager.multiconfig = True + return self.manager.config_manager.change_config(selected_config) def _view_changelog(self) -> None: @@ -159,15 +162,20 @@ def _delete_cached_responses(self) -> None: self.print_error("function reserved for future version") def _edit_auth_config(self) -> None: - config_file = self.manager.path_manager.config_dir / "authentication.yaml" + config_file = self.manager.path_manager.config_folder / "authentication.yaml" self._open_in_text_editor(config_file) def _edit_global_config(self) -> None: - config_file = self.manager.path_manager.config_dir / "global_settings.yaml" + config_file = self.manager.path_manager.config_folder / "global_settings.yaml" self._open_in_text_editor(config_file) def _edit_config(self) -> None: - config_file = self.manager.path_manager.config_dir / self.manager.config_manager.loaded_config / "settings.yaml" + if self.manager.multiconfig: + self.print_error("Cannot eddit 'ALL' config") + return + config_file = ( + self.manager.path_manager.config_folder / self.manager.config_manager.loaded_config / "settings.yaml" + ) self._open_in_text_editor(config_file) def _create_new_config(self) -> None: @@ -175,7 +183,7 @@ def _create_new_config(self) -> None: if not config_name: return self.manager.config_manager.change_config(config_name) - config_file = self.manager.path_manager.config_dir / config_name / "settings.yaml" + config_file = self.manager.path_manager.config_folder / config_name / "settings.yaml" self._open_in_text_editor(config_file) def _edit_urls(self) -> None: @@ -245,7 +253,7 @@ def _process_answer(self, answer: Any, options_map=dict) -> Choice | None: def _get_changelog(self) -> str: """Get latest changelog file from github. Returns its content.""" - path = self.manager.path_manager.config_dir.parent / "CHANGELOG.md" + path = self.manager.path_manager.config_folder.parent / "CHANGELOG.md" url = "https://raw.githubusercontent.com/jbsparrow/CyberDropDownloader/refs/heads/master/CHANGELOG.md" _, latest_version = check_latest_pypi(log_to_console=False) name = f"{path.stem}_{latest_version}{path.suffix}" diff --git a/cyberdrop_dl/ui/progress/hash_progress.py b/cyberdrop_dl/ui/progress/hash_progress.py index c367e89ce..e50472a4f 100644 --- a/cyberdrop_dl/ui/progress/hash_progress.py +++ b/cyberdrop_dl/ui/progress/hash_progress.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from humanfriendly import format_size +from pydantic import ByteSize from rich.console import Group from rich.panel import Panel from rich.progress import BarColumn, Progress @@ -68,10 +68,10 @@ def get_removed_progress(self) -> Panel: def update_currently_hashing(self, file: Path) -> None: self.current_hashing_text.update(self.currently_hashing_task_id, description=f"[blue]{file}") - + file_size = ByteSize(file.stat().st_size) self.current_hashing_text.update( self.currently_hashing_size_task_id, - description=f"[blue]{format_size(file.stat().st_size)}", + description=f"[blue]{file_size.human_readable(decimal=True)}", ) def add_new_completed_hash(self) -> None: diff --git a/cyberdrop_dl/ui/prompts/user_prompts.py b/cyberdrop_dl/ui/prompts/user_prompts.py index 662d4c3a9..4e1997105 100644 --- a/cyberdrop_dl/ui/prompts/user_prompts.py +++ b/cyberdrop_dl/ui/prompts/user_prompts.py @@ -102,7 +102,7 @@ def _check_valid_new_config_name(answer: str, manager: Manager) -> str | None: if answer.casefold() in RESERVED_CONFIG_NAMES: msg = f"[bold red]ERROR:[/bold red] Config name '{answer}' is a reserved internal name" - elif manager.path_manager.config_dir.joinpath(answer).is_dir(): + elif manager.path_manager.config_folder.joinpath(answer).is_dir(): msg = f"[bold red]ERROR:[/bold red] Config with name '{answer}' already exists!" if msg: console.print(msg) @@ -117,7 +117,7 @@ def _check_valid_new_config_name(answer: str, manager: Manager) -> str | None: def auto_cookie_extraction(manager: Manager): answer = basic_prompts.ask_toggle("Enable auto cookies import:") - manager.config_manager.settings_data["Browser_Cookies"]["auto_import"] = answer + manager.config_manager.settings_data.browser_cookies.auto_import = answer if answer: extract_cookies(manager, dry_run=True) manager.config_manager.write_updated_settings_config() @@ -144,8 +144,8 @@ def extract_cookies(manager: Manager, *, dry_run: bool = False) -> None: browsers = list(map(str.capitalize, BROWSERS)) if dry_run: - manager.config_manager.settings_data["Browser_Cookies"]["browsers"] = browsers - manager.config_manager.settings_data["Browser_Cookies"]["sites"] = domains + manager.config_manager.settings_data.browser_cookies.browsers = browsers + manager.config_manager.settings_data.browser_cookies.sites = domains return get_cookies_from_browsers(manager, browsers=browsers, domains=domains) diff --git a/cyberdrop_dl/utils/args.py b/cyberdrop_dl/utils/args.py new file mode 100644 index 000000000..8a225a198 --- /dev/null +++ b/cyberdrop_dl/utils/args.py @@ -0,0 +1,200 @@ +import sys +from argparse import SUPPRESS, ArgumentDefaultsHelpFormatter, ArgumentParser, BooleanOptionalAction +from argparse import _ArgumentGroup as ArgGroup +from pathlib import Path +from typing import Self + +import arrow +from pydantic import BaseModel, Field, ValidationError, computed_field, field_validator, model_validator + +from cyberdrop_dl import __version__ +from cyberdrop_dl.config_definitions import ConfigSettings, GlobalSettings +from cyberdrop_dl.config_definitions.custom_types import AliasModel, HttpURL +from cyberdrop_dl.utils.utilities import handle_validation_error + + +def _check_mutually_exclusive(group: set, msg: str) -> None: + if sum(1 for value in group if value) >= 2: + raise ValueError(msg) + + +class CommandLineOnlyArgs(BaseModel): + links: list[HttpURL] = Field([], description="link(s) to content to download (passing multiple links is supported)") + appdata_folder: Path | None = Field(None, description="AppData folder path") + completed_after: int | None = Field(None, description="only download completed downloads at or after this date") + completed_before: int | None = Field(None, description="only download completed downloads at or before this date") + config: str | None = Field(None, description="name of config to load") + config_file: Path | None = Field(None, description="path to the CDL settings.yaml file to load") + download: bool = Field(False, description="skips UI, start download inmediatly") + max_items_retry: int = Field(0, description="max number of links to retry") + no_ui: bool = Field(False, description="disables the UI/progress view entirely") + retry_all: bool = Field(False, description="retry all downloads") + retry_failed: bool = Field(False, description="retry failed downloads") + retry_maintenance: bool = Field( + False, description="retry download of maintenance files (bunkr). Requires files to be hashed" + ) + + @computed_field + @property + def retry_any(self) -> bool: + return any((self.retry_all, self.retry_failed, self.retry_maintenance)) + + @computed_field + @property + def multiconfig(self) -> bool: + return self.config and self.config.casefold() == "all" + + @field_validator("completed_after", "completed_before", mode="after") + @staticmethod + def arrow_date(value: int) -> arrow.Arrow | None: + return None if not value else arrow.get(value) + + @model_validator(mode="after") + def mutually_exclusive(self) -> Self: + group1 = {self.retry_all, self.retry_failed, self.retry_maintenance} + msg1 = "'--retry-all', '--retry-maintenace' and '--retry-failed' are mutually exclusive" + _check_mutually_exclusive(group1, msg1) + group2 = {self.config, self.config_file} + msg2 = "'--config' and '--config-file' are mutually exclusive" + _check_mutually_exclusive(group2, msg2) + return self + + +class DeprecatedArgs(BaseModel): + download_all_configs: bool = Field( + False, + description="Skip the UI and go straight to downloading (runs all configs sequentially)", + deprecated="'--download-all-configs' is deprecated and may be removed in the future. Use '--download --config all'", + ) + sort_all_configs: bool = Field( + False, + description="Sort all configs sequentially", + deprecated="'--sort-all-configs' is deprecated and may be removed in the future. Use '--sort-downloads --config all'", + ) + sort_all_downloads: bool = Field( + False, + description="sort all downloads, not just those downloaded by Cyberdrop-DL", + deprecated="'--sort-all-downloads' is deprecated and may be removed in the future. Use '--no-sort-cdl-only'", + ) + + +class ParsedArgs(AliasModel): + cli_only_args: CommandLineOnlyArgs = CommandLineOnlyArgs() + config_settings: ConfigSettings = ConfigSettings() + deprecated_args: DeprecatedArgs = DeprecatedArgs() + global_settings: GlobalSettings = GlobalSettings() + + def model_post_init(self, _) -> None: + if self.cli_only_args.retry_all or self.cli_only_args.retry_maintenance: + self.config_settings.runtime_options.ignore_history = True + if self.deprecated_args.sort_all_configs: + self.config_settings.sorting.sort_downloads = True + self.cli_only_args.download = True + self.cli_only_args.config = "ALL" + if self.deprecated_args.sort_all_downloads: + self.config_settings.sorting.sort_cdl_only = False + if self.deprecated_args.download_all_configs: + self.cli_only_args.download = True + self.cli_only_args.config = "ALL" + if self.cli_only_args.no_ui: + self.cli_only_args.download = True + if self.cli_only_args.retry_any: + self.cli_only_args.download = True + if self.cli_only_args.config_file: + self.cli_only_args.download = True + + @staticmethod + def parse_args() -> Self: + """Parses the command line arguments passed into the program. Returns an instance of `ParsedArgs`""" + return parse_args() + + +def _add_args_from_model( + parser: ArgumentParser, model: type[BaseModel], *, cli_args: bool = False, deprecated: bool = False +) -> None: + for name, field in model.model_fields.items(): + cli_name = name.replace("_", "-") + arg_type = type(field.default) + if arg_type not in (list, set, bool): + arg_type = str + help_text = field.description or "" + default = field.default if cli_args else SUPPRESS + default_options = {"default": default, "dest": name, "help": help_text} + name_or_flags = [f"--{cli_name}"] + alias = field.alias or field.validation_alias or field.serialization_alias + if alias and len(alias) == 1: + name_or_flags.insert(0, f"-{alias}") + if arg_type is bool: + action = BooleanOptionalAction + default_options.pop("default") + if cli_args: + action = "store_false" if default else "store_true" + if deprecated: + default_options = default_options | {"default": SUPPRESS} + parser.add_argument(*name_or_flags, action=action, **default_options) + continue + if cli_name == "links": + default_options.pop("dest") + parser.add_argument(cli_name, metavar="LINK(S)", nargs="*", **default_options) + continue + if arg_type in (list, set): + parser.add_argument(*name_or_flags, nargs="*", **default_options) + continue + parser.add_argument(*name_or_flags, type=arg_type, **default_options) + + +def _create_groups_from_nested_models(parser: ArgumentParser, model: type[BaseModel]) -> list[ArgGroup]: + groups: list[ArgGroup] = [] + for name, field in model.model_fields.items(): + submodel = field.annotation + submodel_group = parser.add_argument_group(name) + _add_args_from_model(submodel_group, submodel) + groups.append(submodel_group) + return groups + + +def parse_args() -> ParsedArgs: + """Parses the command line arguments passed into the program.""" + parser = ArgumentParser( + description="Bulk asynchronous downloader for multiple file hosts", + usage="cyberdrop-dl [OPTIONS] URL [URL...]", + epilog="Visit the wiki for aditional details: https://script-ware.gitbook.io/cyberdrop-dl", + formatter_class=ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}") + + cli_only = parser.add_argument_group("CLI-only Options") + _add_args_from_model(cli_only, CommandLineOnlyArgs, cli_args=True) + + group_lists = { + "config_settings": _create_groups_from_nested_models(parser, ConfigSettings), + "global_settings": _create_groups_from_nested_models(parser, GlobalSettings), + "cli_only_args": [cli_only], + } + + deprecated = parser.add_argument_group("Deprecated") + _add_args_from_model(deprecated, DeprecatedArgs, cli_args=True, deprecated=True) + group_lists["deprecated_args"] = [deprecated] + + args = parser.parse_args() + parsed_args = {} + for name, groups in group_lists.items(): + parsed_args[name] = {} + for group in groups: + group_dict = { + arg.dest: getattr(args, arg.dest) + for arg in group._group_actions + if getattr(args, arg.dest, None) is not None + } + if group_dict: + parsed_args[name][group.title] = group_dict + + parsed_args["deprecated_args"] = parsed_args["deprecated_args"].get("Deprecated") or {} + parsed_args["cli_only_args"] = parsed_args["cli_only_args"]["CLI-only Options"] + try: + parsed_args = ParsedArgs.model_validate(parsed_args) + + except ValidationError as e: + handle_validation_error(e, title="CLI arguments") + sys.exit(1) + return parsed_args diff --git a/cyberdrop_dl/utils/args/__init__.py b/cyberdrop_dl/utils/args/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/cyberdrop_dl/utils/args/args.py b/cyberdrop_dl/utils/args/args.py deleted file mode 100644 index f675c6b86..000000000 --- a/cyberdrop_dl/utils/args/args.py +++ /dev/null @@ -1,347 +0,0 @@ -import argparse - -import arrow - -from cyberdrop_dl import __version__ -from cyberdrop_dl.utils.data_enums_classes.supported_domains import SupportedDomains - - -def parse_args() -> argparse.Namespace: - """Parses the command line arguments passed into the program.""" - parser = argparse.ArgumentParser(description="Bulk downloader for multiple file hosts") - general = parser.add_argument_group("General") - general.add_argument("-V", "--version", action="version", version=f"%(prog)s {__version__}") - general.add_argument("--config", type=str, help="name of config to load", default="") - general.add_argument("--proxy", type=str, help="manually specify proxy string", default="") - general.add_argument("--flaresolverr", type=str, help="IP:PORT for flaresolverr", default="") - general.add_argument("--no-ui", action="store_true", help="Disables the UI/Progress view entirely", default=False) - general.add_argument( - "--download", - action="store_true", - help="Skip the UI and go straight to downloading", - default=False, - ) - general.add_argument( - "--download-all-configs", - action="store_true", - help="Skip the UI and go straight to downloading (runs all configs sequentially)", - default=False, - ) - general.add_argument("--sort-all-configs", action="store_true", help="Sort all configs sequentially", default=False) - general.add_argument("--retry-failed", action="store_true", help="retry failed downloads", default=False) - general.add_argument("--retry-all", action="store_true", help="retry all downloads", default=False) - general.add_argument( - "--retry-maintenance", - action="store_true", - help="retry all failed downloads due to maintenance, only supports bunkr and requires files to be hashed", - default=False, - ) - general.add_argument( - "--completed-after", - help="only download completed downloads at or after this date", - default=None, - type=lambda x: None if not x else arrow.get(x), - ) - general.add_argument( - "--completed-before", - help="only download completed downloads at or before this date", - default=None, - type=lambda x: None if not x else arrow.get(x), - ) - general.add_argument("--max-items-retry", help="max number of links to retry", type=int) - - # File Paths - file_paths = parser.add_argument_group("File_Paths") - file_paths.add_argument( - "-i", - "--input-file", - type=str, - help="path to txt file containing urls to download", - default="", - ) - file_paths.add_argument( - "-d", - "--output-folder", - dest="download_dir", - type=str, - help="path to download folder", - default="", - ) - file_paths.add_argument("--config-file", type=str, help="path to the CDL settings.yaml file to load", default="") - file_paths.add_argument( - "--appdata-folder", - dest="appdata_dir", - type=str, - help="path to where you want CDL to store it's AppData folder", - default="", - ) - file_paths.add_argument( - "--log-folder", - dest="log_dir", - type=str, - help="path to where you want CDL to store it's log files", - default="", - ) - file_paths.add_argument("--main-log-filename", type=str, help="filename for the main log file", default="") - file_paths.add_argument( - "--last-forum-post-filename", - type=str, - help="filename for the last forum post log file", - default="", - ) - file_paths.add_argument( - "--unsupported-urls-filename", - type=str, - help="filename for the unsupported urls log file", - default="", - ) - file_paths.add_argument( - "--download-error-urls-filename", - type=str, - help="filename for the download error urls log file", - default="", - ) - file_paths.add_argument( - "--scrape-error-urls-filename", - type=str, - help="filename for the scrape error urls log file", - default="", - ) - file_paths.add_argument("--webhook_url", help="Discord webhook url to send download recap to", default="") - - # Settings - download_options = parser.add_argument_group("Download_Options") - download_options.add_argument( - "--block-download-sub-folders", - action=argparse.BooleanOptionalAction, - help="block sub folder creation", - ) - download_options.add_argument( - "--disable-download-attempt-limit", - action=argparse.BooleanOptionalAction, - help="disable download attempt limit", - ) - download_options.add_argument( - "--disable-file-timestamps", - action=argparse.BooleanOptionalAction, - help="disable file timestamps", - ) - download_options.add_argument( - "--include-album-id-in-folder-name", - action=argparse.BooleanOptionalAction, - help="include album id in folder name", - ) - download_options.add_argument( - "--include-thread-id-in-folder-name", - action=argparse.BooleanOptionalAction, - help="include thread id in folder name", - ) - download_options.add_argument( - "--remove-domains-from-folder-names", - action=argparse.BooleanOptionalAction, - help="remove website domains from folder names", - ) - download_options.add_argument( - "--remove-generated-id-from-filenames", - action=argparse.BooleanOptionalAction, - help="remove site generated id from filenames", - ) - download_options.add_argument( - "--scrape-single-forum-post", - action=argparse.BooleanOptionalAction, - help="scrape single forum post", - ) - download_options.add_argument( - "--separate-posts", - action=argparse.BooleanOptionalAction, - help="separate posts into folders", - ) - download_options.add_argument( - "--skip-download-mark-completed", - action=argparse.BooleanOptionalAction, - help="skip download and mark as completed in history", - ) - download_options.add_argument( - "--skip-referer-seen-before", - action=argparse.BooleanOptionalAction, - help="skip download if referer has been seen before", - ) - download_options.add_argument( - "--maximum-number-of-children", - nargs="*", - type=int, - help="max number of children an scrape item can have", - default=[], - ) - - file_size_limits = parser.add_argument_group("File_Size_Limits") - file_size_limits.add_argument( - "--maximum-image-size", - type=int, - help="maximum image size in bytes (default: %(default)s)", - default=0, - ) - file_size_limits.add_argument( - "--maximum-video-size", - type=int, - help="maximum video size in bytes (default: %(default)s)", - default=0, - ) - file_size_limits.add_argument( - "--maximum-other-size", - type=int, - help="maximum other size in bytes (default: %(default)s)", - default=0, - ) - file_size_limits.add_argument( - "--minimum-image-size", - type=int, - help="minimum image size in bytes (default: %(default)s)", - default=0, - ) - file_size_limits.add_argument( - "--minimum-video-size", - type=int, - help="minimum video size in bytes (default: %(default)s)", - default=0, - ) - file_size_limits.add_argument( - "--minimum-other-size", - type=int, - help="minimum other size in bytes (default: %(default)s)", - default=0, - ) - - ignore_options = parser.add_argument_group("Ignore_Options") - ignore_options.add_argument( - "--exclude-videos", - action=argparse.BooleanOptionalAction, - help="exclude videos from downloading", - ) - ignore_options.add_argument( - "--exclude-images", - action=argparse.BooleanOptionalAction, - help="exclude images from downloading", - ) - ignore_options.add_argument( - "--exclude-audio", - action=argparse.BooleanOptionalAction, - help="exclude images from downloading", - ) - ignore_options.add_argument( - "--exclude-other", - action=argparse.BooleanOptionalAction, - help="exclude other files from downloading", - ) - ignore_options.add_argument( - "--ignore-coomer-ads", - action=argparse.BooleanOptionalAction, - help="ignore coomer ads when scraping", - ) - ignore_options.add_argument( - "--skip-hosts", - choices=SupportedDomains.supported_hosts, - action="append", - help="skip these domains when scraping", - default=[], - ) - ignore_options.add_argument( - "--only-hosts", - choices=SupportedDomains.supported_hosts, - action="append", - help="only scrape these domains", - default=[], - ) - - runtime_options = parser.add_argument_group("Runtime_Options") - runtime_options.add_argument( - "--ignore-history", - action=argparse.BooleanOptionalAction, - help="ignore history when scraping", - ) - runtime_options.add_argument("--log-level", type=int, help="set the log level (default: %(default)s)", default=10) - runtime_options.add_argument( - "--skip-check-for-partial-files", - action=argparse.BooleanOptionalAction, - help="skip check for partial downloads", - ) - runtime_options.add_argument( - "--skip-check-for-empty-folders", - action=argparse.BooleanOptionalAction, - help="skip check (and removal) for empty folders", - ) - runtime_options.add_argument( - "--delete-partial-files", - action=argparse.BooleanOptionalAction, - help="delete partial downloads", - ) - runtime_options.add_argument( - "--send-unsupported-to-jdownloader", - action=argparse.BooleanOptionalAction, - help="send unsupported urls to jdownloader", - ) - runtime_options.add_argument( - "--update-last-forum-post", - action=argparse.BooleanOptionalAction, - help="update the last forum post", - ) - - sorting_options = parser.add_argument_group("Sorting") - sorting_options.add_argument( - "--sort-downloads", - action=argparse.BooleanOptionalAction, - help="sort downloads into folders", - ) - sorting_options.add_argument( - "--sort-all-downloads", - action=argparse.BooleanOptionalAction, - help="sort all downloads, not just those downloaded by Cyberdrop-DL", - ) - sorting_options.add_argument( - "--sort-folder", - type=str, - help="path to where you want CDL to store it's log files", - default="", - ) - sorting_options.add_argument( - "--scan-folder", - type=str, - help="path to scan for files, if not set then the download_dir is used", - default="", - ) - - ui_options = parser.add_argument_group("UI_Options") - ui_options.add_argument("--vi-mode", action="store_true", help="enable VIM keybindings for UI", default=None) - ui_options.add_argument( - "--refresh-rate", - type=int, - help="refresh rate for the UI (default: %(default)s)", - default=10, - ) - ui_options.add_argument( - "--scraping-item-limit", - type=int, - help="number of lines to allow for scraping items before overflow (default: %(default)s)", - default=5, - ) - ui_options.add_argument( - "--downloading-item-limit", - type=int, - help="number of lines to allow for downloading items before overflow (default: %(default)s)", - default=5, - ) - - # Links - parser.add_argument( - "links", - metavar="link", - nargs="*", - help="link to content to download (passing multiple links is supported)", - default=[], - ) - - args = parser.parse_args() - # set ignore history on retry_all - if args.retry_all or args.retry_maintenance: - args.ignore_history = True - return args diff --git a/cyberdrop_dl/utils/args/config_definitions.py b/cyberdrop_dl/utils/args/config_definitions.py deleted file mode 100644 index 346fa403c..000000000 --- a/cyberdrop_dl/utils/args/config_definitions.py +++ /dev/null @@ -1,168 +0,0 @@ -from __future__ import annotations - -from cyberdrop_dl.utils import constants - -authentication_settings: dict = { - "Forums": { - "celebforum_xf_user_cookie": "", - "celebforum_username": "", - "celebforum_password": "", - "f95zone_xf_user_cookie": "", - "f95zone_username": "", - "f95zone_password": "", - "leakedmodels_xf_user_cookie": "", - "leakedmodels_username": "", - "leakedmodels_password": "", - "nudostar_xf_user_cookie": "", - "nudostar_username": "", - "nudostar_password": "", - "simpcity_xf_user_cookie": "", - "simpcity_username": "", - "simpcity_password": "", - "socialmediagirls_xf_user_cookie": "", - "socialmediagirls_username": "", - "socialmediagirls_password": "", - "xbunker_xf_user_cookie": "", - "xbunker_username": "", - "xbunker_password": "", - }, - "Coomer": { - "session": "", - }, - "XXXBunker": { - "PHPSESSID": "", - }, - "GoFile": { - "gofile_api_key": "", - }, - "Imgur": { - "imgur_client_id": "", - }, - "JDownloader": { - "jdownloader_username": "", - "jdownloader_password": "", - "jdownloader_device": "", - }, - "PixelDrain": { - "pixeldrain_api_key": "", - }, - "RealDebrid": { - "realdebrid_api_key": "", - }, - "Reddit": { - "reddit_personal_use_script": "", - "reddit_secret": "", - }, -} - -settings: dict = { - "Download_Options": { - "block_download_sub_folders": False, - "disable_download_attempt_limit": False, - "disable_file_timestamps": False, - "include_album_id_in_folder_name": False, - "include_thread_id_in_folder_name": False, - "remove_domains_from_folder_names": False, - "remove_generated_id_from_filenames": False, - "scrape_single_forum_post": False, - "separate_posts": False, - "skip_download_mark_completed": False, - "skip_referer_seen_before": False, - "maximum_number_of_children": [], - }, - "Files": { - "input_file": str(constants.APP_STORAGE / "Configs" / "{config}" / "URLs.txt"), - "download_folder": str(constants.DOWNLOAD_STORAGE), - }, - "Logs": { - "log_folder": str(constants.APP_STORAGE / "Configs" / "{config}" / "Logs"), - "webhook_url": "", - "main_log_filename": "downloader.log", - "last_forum_post_filename": "Last_Scraped_Forum_Posts.csv", - "unsupported_urls_filename": "Unsupported_URLs.csv", - "download_error_urls_filename": "Download_Error_URLs.csv", - "scrape_error_urls_filename": "Scrape_Error_URLs.csv", - "rotate_logs": False, - }, - "File_Size_Limits": { - "maximum_image_size": 0, - "maximum_other_size": 0, - "maximum_video_size": 0, - "minimum_image_size": 0, - "minimum_other_size": 0, - "minimum_video_size": 0, - }, - "Ignore_Options": { - "exclude_videos": False, - "exclude_images": False, - "exclude_audio": False, - "exclude_other": False, - "ignore_coomer_ads": False, - "skip_hosts": [], - "only_hosts": [], - }, - "Runtime_Options": { - "ignore_history": False, - "log_level": 10, - "console_log_level": 100, - "skip_check_for_partial_files": False, - "skip_check_for_empty_folders": False, - "delete_partial_files": False, - "update_last_forum_post": True, - "send_unsupported_to_jdownloader": False, - "jdownloader_download_dir": None, - "jdownloader_autostart": False, - "jdownloader_whitelist": [], - }, - "Sorting": { - "sort_downloads": False, - "sort_folder": str(constants.DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads"), - "scan_folder": None, - "sort_cdl_only": True, - "sort_incremementer_format": " ({i})", - "sorted_audio": "{sort_dir}/{base_dir}/Audio/{filename}{ext}", - "sorted_image": "{sort_dir}/{base_dir}/Images/{filename}{ext}", - "sorted_other": "{sort_dir}/{base_dir}/Other/{filename}{ext}", - "sorted_video": "{sort_dir}/{base_dir}/Videos/{filename}{ext}", - }, - "Browser_Cookies": { - "browsers": ["Chrome"], - "auto_import": False, - "sites": [], - }, - "Dupe_Cleanup_Options": { - "hashing": "IN_PLACE", - "auto_dedupe": True, - "add_md5_hash": False, - "add_sha256_hash": False, - "send_deleted_to_trash": True, - }, -} - -global_settings: dict = { - "General": { - "allow_insecure_connections": False, - "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0", - "proxy": "", - "flaresolverr": "", - "max_file_name_length": 95, - "max_folder_name_length": 60, - "required_free_space": 5, - }, - "Rate_Limiting_Options": { - "connection_timeout": 15, - "download_attempts": 5, - "read_timeout": 300, - "rate_limit": 50, - "download_delay": 0.5, - "max_simultaneous_downloads": 15, - "max_simultaneous_downloads_per_domain": 3, - "download_speed_limit": 0, - }, - "UI_Options": { - "vi_mode": False, - "refresh_rate": 10, - "scraping_item_limit": 5, - "downloading_item_limit": 5, - }, -} diff --git a/cyberdrop_dl/utils/constants.py b/cyberdrop_dl/utils/constants.py index dac156444..da77ce8fb 100644 --- a/cyberdrop_dl/utils/constants.py +++ b/cyberdrop_dl/utils/constants.py @@ -1,5 +1,5 @@ import re -from enum import IntEnum +from enum import IntEnum, StrEnum, auto from pathlib import Path from rich.text import Text @@ -48,7 +48,19 @@ class CustomHTTPStatus(IntEnum): APP_STORAGE = Path("./AppData") DOWNLOAD_STORAGE = Path("./Downloads") RESERVED_CONFIG_NAMES = ["all", "default"] -BROWSERS = ["chrome", "firefox", "safari", "edge", "opera", "brave", "librewolf", "opera_gx", "vivaldi", "chromium"] + + +class BROWSERS(StrEnum): + chrome = auto() + firefox = auto() + safari = auto() + edge = auto() + opera = auto() + brave = auto() + librewolf = auto() + opera_gx = auto() + vivaldi = auto() + chromium = auto() # Pypi diff --git a/cyberdrop_dl/utils/cookie_extraction.py b/cyberdrop_dl/utils/cookie_extraction.py index ae9c3a235..3833c1917 100644 --- a/cyberdrop_dl/utils/cookie_extraction.py +++ b/cyberdrop_dl/utils/cookie_extraction.py @@ -1,7 +1,6 @@ from __future__ import annotations import contextlib -import re from functools import wraps from http.cookiejar import MozillaCookieJar from textwrap import dedent @@ -62,13 +61,10 @@ def get_cookies_from_browsers( msg = "No domains selected" raise ValueError(msg) - browsers = browsers or manager.config_manager.settings_data["Browser_Cookies"]["browsers"] - if browsers: - browsers = list(map(str.lower, re.split(r"[ ,]+", browsers))) - if domains: - domains = list(map(str.lower, re.split(r"[ ,]+", domains))) - else: - domains = list(SupportedDomains.supported_hosts) + browsers = browsers or manager.config_manager.settings_data.browser_cookies.browsers + domains = domains or manager.config_manager.settings_data.browser_cookies.sites + browsers = list(map(str.lower, browsers)) + domains = list(map(str.lower, domains)) extractors = [getattr(browser_cookie3, b) for b in browsers if hasattr(browser_cookie3, b)] diff --git a/cyberdrop_dl/utils/data_enums_classes/url_objects.py b/cyberdrop_dl/utils/data_enums_classes/url_objects.py index 466e5ef61..ca2e37448 100644 --- a/cyberdrop_dl/utils/data_enums_classes/url_objects.py +++ b/cyberdrop_dl/utils/data_enums_classes/url_objects.py @@ -1,6 +1,7 @@ from __future__ import annotations -from dataclasses import field +from dataclasses import dataclass, field +from enum import IntEnum from pathlib import Path from typing import TYPE_CHECKING @@ -11,69 +12,61 @@ from yarl import URL -FORUM = 0 -FORUM_POST = 1 -FILE_HOST_PROFILE = 2 -FILE_HOST_ALBUM = 3 -SCRAPE_ITEM_TYPES = [FORUM, FORUM_POST, FILE_HOST_PROFILE, FILE_HOST_ALBUM] +class ScrapeItemType(IntEnum): + FORUM = 0 + FORUM_POST = 1 + FILE_HOST_PROFILE = 2 + FILE_HOST_ALBUM = 3 +FORUM = ScrapeItemType.FORUM +FORUM_POST = ScrapeItemType.FORUM_POST +FILE_HOST_PROFILE = ScrapeItemType.FILE_HOST_PROFILE +FILE_HOST_ALBUM = ScrapeItemType.FILE_HOST_ALBUM + + +@dataclass(unsafe_hash=True) class MediaItem: - def __init__( - self, - url: URL, - origin: ScrapeItem, - download_folder: Path, - filename: Path | str, - original_filename: Path | str | None = None, - debrid_link: URL | None = None, - ) -> None: - self.url: URL = url - self.referer: URL = origin.url - self.debrid_link: URL | None = debrid_link - self.album_id: str | None = origin.album_id - self.download_folder: Path = download_folder - self.filename: str = str(filename) - self.ext: str = Path(filename).suffix - self.download_filename: str = field(init=False) - self.original_filename: str = str(original_filename) if original_filename else self.filename - self.file_lock_reference_name: str = field(init=False) - self.datetime: str = field(init=False) - self.parents = origin.parents.copy() + url: URL + origin: ScrapeItem + download_folder: Path + filename: str + original_filename: str | None = None + debrid_link: URL | None = None - self.filesize: int = field(init=False) - self.current_attempt: int = field(init=False) + file_lock_reference_name: str | None = field(default=None, init=False) + download_filename: str | None = field(default=None, init=False) + datetime: str | None = field(default=None, init=False) + filesize: int | None = field(default=None, init=False) + current_attempt: int = field(default=0, init=False) + partial_file: Path | None = field(default=None, init=False) + complete_file: Path | None = field(default=None, init=False) + task_id: TaskID | None = field(default=None, init=False) - self.partial_file: Path | None = field(init=False) - self.complete_file: Path = field(init=False) - self.task_id: TaskID = field(init=False) + def __post_init__(self) -> None: + self.referer = self.origin.url + self.album_id = self.origin.album_id + self.ext = Path(self.filename).suffix + self.original_filename = self.original_filename or self.filename + self.parents = self.origin.parents.copy() +@dataclass(kw_only=True) class ScrapeItem: - def __init__( - self, - url: URL, - parent_title: str, - part_of_album: bool = False, - album_id: str | None = None, - possible_datetime: int | None = None, - retry: bool = False, - retry_path: Path | None = None, - ) -> None: - self.url: URL = url - self.parent_title: str = parent_title - # WARNING: unsafe but deepcopy is used when a new child item is created - self.parents: list[URL] = [] - self.children: int = 0 - self.children_limit: int = 0 - self.type: int | None = None - self.part_of_album: bool = part_of_album - self.album_id: str | None = album_id - self.possible_datetime: int = possible_datetime - self.retry: bool = retry - self.retry_path: Path = retry_path - self.completed_at = None - self.created_at = None + url: URL + parent_title: str = "" + part_of_album: bool = False + album_id: str | None = None + possible_datetime: int | None = None + retry: bool = False + retry_path: Path | None = None + + parents: list[URL] = field(default_factory=list, init=False) + children: int = field(default=0, init=False) + children_limit: int = field(default=0, init=False) + type: int | None = field(default=None, init=False) + completed_at: int | None = field(default=None, init=False) + created_at: int | None = field(default=None, init=False) def add_to_parent_title(self, title: str) -> None: """Adds a title to the parent title.""" diff --git a/cyberdrop_dl/utils/database/tables/history_table.py b/cyberdrop_dl/utils/database/tables/history_table.py index 4094c8667..949e151f3 100644 --- a/cyberdrop_dl/utils/database/tables/history_table.py +++ b/cyberdrop_dl/utils/database/tables/history_table.py @@ -128,7 +128,7 @@ async def insert_incompleted(self, domain: str, media_item: MediaItem) -> None: """Inserts an uncompleted file into the database.""" domain = get_db_domain(domain) url_path = get_db_path(media_item.url, str(media_item.referer)) - download_filename = media_item.download_filename if isinstance(media_item.download_filename, str) else "" + download_filename = media_item.download_filename or "" try: await self.db_conn.execute( """UPDATE media SET domain = ?, album_id = ? WHERE domain = 'no_crawler' and url_path = ? and referer = ?""", diff --git a/cyberdrop_dl/utils/logger.py b/cyberdrop_dl/utils/logger.py index 02cb73f4d..7faffd498 100644 --- a/cyberdrop_dl/utils/logger.py +++ b/cyberdrop_dl/utils/logger.py @@ -11,9 +11,12 @@ logger_debug = logging.getLogger("cyberdrop_dl_debug") console = Console() +ERROR_PREFIX = "\n[bold red]ERROR: [/bold red]" -def print_to_console(text: Text | str) -> None: - console.print(text) + +def print_to_console(text: Text | str, *, error: bool = False, **kwargs) -> None: + msg = (ERROR_PREFIX + text) if error else text + console.print(msg, **kwargs) def log(message: Exception | str, level: int = 10, *, sleep: int | None = None, **kwargs) -> None: diff --git a/cyberdrop_dl/utils/sorting.py b/cyberdrop_dl/utils/sorting.py index 744944c7f..d7b5262cc 100644 --- a/cyberdrop_dl/utils/sorting.py +++ b/cyberdrop_dl/utils/sorting.py @@ -29,18 +29,18 @@ def get_file_date_in_us_ca_formats(file: Path) -> tuple[str, str]: class Sorter: def __init__(self, manager: Manager) -> None: self.manager = manager - self.download_dir = manager.path_manager.scan_dir or manager.path_manager.download_dir - self.sorted_downloads = manager.path_manager.sorted_dir - self.incrementer_format: str = manager.config_manager.settings_data["Sorting"]["sort_incremementer_format"] - self.sort_cdl_only = manager.config_manager.settings_data["Sorting"]["sort_cdl_only"] - if manager.config_manager.settings_data["Download_Options"]["skip_download_mark_completed"]: + self.download_dir = manager.path_manager.scan_folder or manager.path_manager.download_folder + self.sorted_downloads = manager.path_manager.sorted_folder + self.incrementer_format: str = manager.config_manager.settings_data.sorting.sort_incremementer_format + self.sort_cdl_only = manager.config_manager.settings_data.sorting.sort_cdl_only + if manager.config_manager.settings_data.download_options.skip_download_mark_completed: self.sort_cdl_only = False self.db_manager = manager.db_manager - self.audio_format: str = manager.config_manager.settings_data["Sorting"]["sorted_audio"] - self.image_format: str = manager.config_manager.settings_data["Sorting"]["sorted_image"] - self.video_format: str = manager.config_manager.settings_data["Sorting"]["sorted_video"] - self.other_format: str = manager.config_manager.settings_data["Sorting"]["sorted_other"] + self.audio_format: str = manager.config_manager.settings_data.sorting.sorted_audio + self.image_format: str = manager.config_manager.settings_data.sorting.sorted_image + self.video_format: str = manager.config_manager.settings_data.sorting.sorted_video + self.other_format: str = manager.config_manager.settings_data.sorting.sorted_other self.audio_count = 0 self.image_count = 0 diff --git a/cyberdrop_dl/utils/transfer/first_time_setup.py b/cyberdrop_dl/utils/transfer/first_time_setup.py index cac9927f6..d8b2000d1 100644 --- a/cyberdrop_dl/utils/transfer/first_time_setup.py +++ b/cyberdrop_dl/utils/transfer/first_time_setup.py @@ -1,14 +1,12 @@ from __future__ import annotations -import copy import shutil from pathlib import Path from typing import TYPE_CHECKING import platformdirs -import yaml -from cyberdrop_dl.utils import constants +from cyberdrop_dl.utils import constants, yaml from cyberdrop_dl.utils.transfer.transfer_v4_db import transfer_v4_db if TYPE_CHECKING: @@ -72,53 +70,41 @@ def startup(self) -> None: if Path("./Unsupported_URLs.csv").is_file(): Path("./Unsupported_URLs.csv").rename(OLD_FILES / "Unsupported_URLs.csv") - self.update_cache(constants.APP_STORAGE / "Cache" / "cache.yaml") + self.set_first_startup_completed(constants.APP_STORAGE / "Cache" / "cache.yaml") @staticmethod def check_cache_for_moved(cache_file: Path) -> bool: """Checks the cache for moved files.""" - with cache_file.open() as yaml_file: - cache = yaml.load(yaml_file.read(), Loader=yaml.FullLoader) - if cache is None: - with cache_file.open("w") as yaml_file: - cache = {"first_startup_completed": False} - yaml.dump(cache, yaml_file) + cache = yaml.load(cache_file, create=True) + if not cache: + cache = {"first_startup_completed": False} + yaml.save(cache_file, cache) return bool(cache.get("first_startup_completed", False)) @staticmethod - def update_cache(cache_file: Path) -> None: + def set_first_startup_completed(cache_file: Path) -> None: """Updates the cache to reflect the new location.""" - cache_file.parent.mkdir(parents=True, exist_ok=True) - cache_file.touch(exist_ok=True) - with cache_file.open() as yaml_file: - cache = yaml.load(yaml_file.read(), Loader=yaml.FullLoader) - if cache is None: - cache = {"first_startup_completed": False} + cache = yaml.load(cache_file, create=True) cache["first_startup_completed"] = True - with cache_file.open("w") as yaml_file: - yaml.dump(cache, yaml_file) + yaml.save(cache_file, cache) @staticmethod def update_default_config(cache_file: Path, config_name: str) -> None: """Updates the default config in the cache.""" - cache_file.parent.mkdir(parents=True, exist_ok=True) - cache_file.touch(exist_ok=True) - with cache_file.open() as yaml_file: - cache = yaml.load(yaml_file.read(), Loader=yaml.FullLoader) - if cache is None: + cache = yaml.load(cache_file, create=True) + if not cache: cache = {"first_startup_completed": False} cache["default_config"] = config_name - with cache_file.open("w") as yaml_file: - yaml.dump(cache, yaml_file) + yaml.save(cache_file, cache) @staticmethod def transfer_v4_config(old_config_path: Path, new_config_name: str) -> None: """Transfers a V4 config into V5 possession.""" - from cyberdrop_dl.utils.args.config_definitions import authentication_settings, global_settings, settings + from cyberdrop_dl.config_definitions import AuthSettings, ConfigSettings, GlobalSettings - new_auth_data = copy.deepcopy(authentication_settings) - new_user_data = copy.deepcopy(settings) - new_global_settings_data = copy.deepcopy(global_settings) + new_auth_data = AuthSettings().model_dump() + new_user_data = ConfigSettings().model_dump() + new_global_settings_data = GlobalSettings().model_dump() if Path("./URLs.txt").is_file(): new_user_data["Files"]["input_file"] = Path("./URLs.txt") @@ -128,8 +114,7 @@ def transfer_v4_config(old_config_path: Path, new_config_name: str) -> None: new_user_data["Logs"]["log_folder"] = constants.APP_STORAGE / "Configs" / new_config_name / "Logs" new_user_data["Sorting"]["sort_folder"] = constants.DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" - with old_config_path.open() as yaml_file: - old_data = yaml.load(yaml_file.read(), Loader=yaml.FullLoader) + old_data = yaml.load(old_config_path) old_data = old_data["Configuration"] # Auth data transfer @@ -224,12 +209,9 @@ def transfer_v4_config(old_config_path: Path, new_config_name: str) -> None: # Write config new_config_path = constants.APP_STORAGE / "Configs" / new_config_name / "settings.yaml" - new_config_path.parent.mkdir(parents=True, exist_ok=True) - with new_config_path.open("w") as yaml_file: - yaml.dump(new_user_data, yaml_file) new_auth_path = constants.APP_STORAGE / "Configs" / "authentication.yaml" - with new_auth_path.open("w") as yaml_file: - yaml.dump(new_auth_data, yaml_file) new_global_settings_path = constants.APP_STORAGE / "Configs" / "global_settings.yaml" - with new_global_settings_path.open("w") as yaml_file: - yaml.dump(new_global_settings_data, yaml_file) + new_config_path.parent.mkdir(parents=True, exist_ok=True) + yaml.save(new_config_path, new_user_data) + yaml.save(new_auth_path, new_auth_data) + yaml.save(new_global_settings_path, new_global_settings_data) diff --git a/cyberdrop_dl/utils/transfer/transfer_v4_config.py b/cyberdrop_dl/utils/transfer/transfer_v4_config.py index fbaa81f04..c79c26242 100644 --- a/cyberdrop_dl/utils/transfer/transfer_v4_config.py +++ b/cyberdrop_dl/utils/transfer/transfer_v4_config.py @@ -1,29 +1,17 @@ -import copy from pathlib import Path -import yaml - +from cyberdrop_dl.config_definitions import AuthSettings, ConfigSettings, GlobalSettings from cyberdrop_dl.managers.manager import Manager -from cyberdrop_dl.utils.args.config_definitions import settings - - -def _save_yaml(file: Path, data: dict) -> None: - """Saves a dict to a yaml file.""" - file.parent.mkdir(parents=True, exist_ok=True) - with file.open("w") as yaml_file: - yaml.dump(data, yaml_file) - - -def _load_yaml(file: Path) -> dict: - """Loads a yaml file and returns it as a dict.""" - with file.open() as yaml_file: - return yaml.load(yaml_file.read(), Loader=yaml.FullLoader) +from cyberdrop_dl.utils import yaml def transfer_v4_config(manager: Manager, new_config_name: str, old_config_path: Path) -> None: """Transfers a V4 config into V5 possession.""" - new_auth_data = manager.config_manager.authentication_data - new_user_data = copy.deepcopy(settings) + new_auth_data = AuthSettings().model_dump() + new_user_data = ConfigSettings().model_dump() + new_global_data = GlobalSettings().model_dump() + old_data = yaml.load(old_config_path) + old_data = old_data["Configuration"] from cyberdrop_dl.managers.path_manager import constants @@ -32,10 +20,6 @@ def transfer_v4_config(manager: Manager, new_config_name: str, old_config_path: new_user_data["Logs"]["log_folder"] = constants.APP_STORAGE / "Configs" / new_config_name / "Logs" new_user_data["Sorting"]["sort_folder"] = constants.DOWNLOAD_STORAGE / "Cyberdrop-DL Sorted Downloads" - new_global_data = manager.config_manager.global_settings_data - old_data = _load_yaml(old_config_path) - old_data = old_data["Configuration"] - # Auth data transfer new_auth_data["Forums"]["nudostar_username"] = old_data["Authentication"]["nudostar_username"] new_auth_data["Forums"]["nudostar_password"] = old_data["Authentication"]["nudostar_password"] @@ -111,15 +95,15 @@ def transfer_v4_config(manager: Manager, new_config_name: str, old_config_path: ] # Save Data - new_settings = manager.path_manager.config_dir / new_config_name / "settings.yaml" - new_logs = manager.path_manager.config_dir / new_config_name / "Logs" + new_settings = manager.path_manager.config_folder / new_config_name / "settings.yaml" + new_logs = manager.path_manager.config_folder / new_config_name / "Logs" new_settings.parent.mkdir(parents=True, exist_ok=True) new_logs.mkdir(parents=True, exist_ok=True) old_config_path = Path(old_config_path).parent old_urls_path = Path(old_data["Files"]["input_file"]) - new_urls = manager.path_manager.config_dir / new_config_name / "URLs.txt" + new_urls = manager.path_manager.config_folder / new_config_name / "URLs.txt" new_urls.touch(exist_ok=True) if old_urls_path.is_absolute(): @@ -137,7 +121,9 @@ def transfer_v4_config(manager: Manager, new_config_name: str, old_config_path: else: new_urls.touch(exist_ok=True) - manager.config_manager.create_new_config(new_settings, new_user_data) + manager.config_manager.authentication_data = AuthSettings.model_validate(new_auth_data) + manager.config_manager.global_settings_data = GlobalSettings.model_validate(new_global_data) + manager.config_manager.save_as_new_config(new_settings, ConfigSettings.model_validate(new_user_data)) manager.config_manager.write_updated_authentication_config() manager.config_manager.write_updated_global_settings_config() manager.config_manager.change_config(new_config_name) diff --git a/cyberdrop_dl/utils/utilities.py b/cyberdrop_dl/utils/utilities.py index 44d79dc5f..61253aa0b 100644 --- a/cyberdrop_dl/utils/utilities.py +++ b/cyberdrop_dl/utils/utilities.py @@ -13,6 +13,7 @@ import apprise import rich from aiohttp import ClientSession, FormData +from pydantic import ValidationError from rich.text import Text from yarl import URL @@ -20,6 +21,7 @@ from cyberdrop_dl.managers.real_debrid.errors import RealDebridError from cyberdrop_dl.utils import constants from cyberdrop_dl.utils.logger import log, log_with_color +from cyberdrop_dl.utils.yaml import handle_validation_error if TYPE_CHECKING: from collections.abc import Callable @@ -112,7 +114,7 @@ def get_filename_and_ext(filename: str, forum: bool = False) -> tuple[str, str]: def get_download_path(manager: Manager, scrape_item: ScrapeItem, domain: str) -> Path: """Returns the path to the download folder.""" - download_dir = manager.path_manager.download_dir + download_dir = manager.path_manager.download_folder if scrape_item.retry: return scrape_item.retry_path @@ -126,7 +128,7 @@ def get_download_path(manager: Manager, scrape_item: ScrapeItem, domain: str) -> def remove_file_id(manager: Manager, filename: str, ext: str) -> tuple[str, str]: """Removes the additional string some websites adds to the end of every filename.""" original_filename = filename - if not manager.config_manager.settings_data["Download_Options"]["remove_generated_id_from_filenames"]: + if not manager.config_manager.settings_data.download_options.remove_generated_id_from_filenames: return original_filename, filename filename = filename.rsplit(ext, 1)[0] @@ -187,15 +189,15 @@ def purge_dir_tree(dirname: Path) -> None: async def check_partials_and_empty_folders(manager: Manager) -> None: """Checks for partial downloads and empty folders.""" - if manager.config_manager.settings_data["Runtime_Options"]["delete_partial_files"]: + if manager.config_manager.settings_data.runtime_options.delete_partial_files: log_with_color("Deleting partial downloads...", "bold_red", 20) - partial_downloads = manager.path_manager.download_dir.rglob("*.part") + partial_downloads = manager.path_manager.download_folder.rglob("*.part") for file in partial_downloads: file.unlink(missing_ok=True) - elif not manager.config_manager.settings_data["Runtime_Options"]["skip_check_for_partial_files"]: + elif not manager.config_manager.settings_data.runtime_options.skip_check_for_partial_files: log_with_color("Checking for partial downloads...", "yellow", 20) - partial_downloads = any(f.is_file() for f in manager.path_manager.download_dir.rglob("*.part")) + partial_downloads = any(f.is_file() for f in manager.path_manager.download_folder.rglob("*.part")) if partial_downloads: log_with_color("There are partial downloads in the downloads folder", "yellow", 20) temp_downloads = any(Path(f).is_file() for f in await manager.db_manager.temp_table.get_temp_names()) @@ -203,11 +205,11 @@ async def check_partials_and_empty_folders(manager: Manager) -> None: msg = "There are partial downloads from the previous run, please re-run the program." log_with_color(msg, "yellow", 20) - if not manager.config_manager.settings_data["Runtime_Options"]["skip_check_for_empty_folders"]: + if not manager.config_manager.settings_data.runtime_options.skip_check_for_empty_folders: log_with_color("Checking for empty folders...", "yellow", 20) - purge_dir_tree(manager.path_manager.download_dir) - if isinstance(manager.path_manager.sorted_dir, Path): - purge_dir_tree(manager.path_manager.sorted_dir) + purge_dir_tree(manager.path_manager.download_folder) + if isinstance(manager.path_manager.sorted_folder, Path): + purge_dir_tree(manager.path_manager.sorted_folder) def check_latest_pypi(log_to_console: bool = True, call_from_ui: bool = False) -> tuple[str, str]: @@ -280,29 +282,30 @@ def check_prelease_version(current_version: str, releases: list[str]) -> tuple[s def sent_apprise_notifications(manager: Manager) -> None: - apprise_file = manager.path_manager.config_dir / manager.config_manager.loaded_config / "apprise.txt" + apprise_file = manager.path_manager.config_folder / manager.config_manager.loaded_config / "apprise.txt" text: Text = constants.LOG_OUTPUT_TEXT constants.LOG_OUTPUT_TEXT = Text("") if not apprise_file.is_file(): return - with apprise_file.open(encoding="utf8") as file: - lines = [line.strip() for line in file] + from cyberdrop_dl.config_definitions.custom_types import AppriseURL - if not lines: + try: + with apprise_file.open(encoding="utf8") as file: + apprise_urls = [AppriseURL(line.strip()) for line in file] + except ValidationError as e: + sources = {"AppriseURLModel": apprise_file} + handle_validation_error(e, sources=sources) + return + + if not apprise_urls: return rich.print("\nSending notifications.. ") apprise_obj = apprise.Apprise() - for line in lines: - parts = line.split("://", 1)[0].split("=", 1) - url = line - tags = "no_logs" - if len(parts) == 2: - tags, url = line.split("=", 1) - tags = tags.split(",") - apprise_obj.add(url, tag=tags) + for apprise_url in apprise_urls: + apprise_obj.add(apprise_url.url, tag=apprise_url.tags) results = [] result = apprise_obj.notify( @@ -340,26 +343,19 @@ def sent_apprise_notifications(manager: Manager) -> None: async def send_webhook_message(manager: Manager) -> None: """Outputs the stats to a code block for webhook messages.""" - webhook_url: str = manager.config_manager.settings_data["Logs"]["webhook_url"] + webhook = manager.config_manager.settings_data.logs.webhook - if not webhook_url: + if not webhook: return - url = webhook_url.strip() - parts = url.split("://", 1)[0].split("=", 1) - tags = ["no_logs"] - if len(parts) == 2: - tags, url = url.split("=", 1) - tags = tags.split(",") - - url = URL(url) + url = webhook.url.get_secret_value() text: Text = constants.LOG_OUTPUT_TEXT plain_text = parse_rich_text_by_style(text, constants.STYLE_TO_DIFF_FORMAT_MAP) main_log = manager.path_manager.main_log form = FormData() - if "attach_logs" in tags and main_log.is_file(): + if "attach_logs" in webhook.tags and main_log.is_file(): if main_log.stat().st_size <= 25 * 1024 * 1024: async with aiofiles.open(main_log, "rb") as f: form.add_field("file", await f.read(), filename=main_log.name) diff --git a/cyberdrop_dl/utils/yaml.py b/cyberdrop_dl/utils/yaml.py new file mode 100644 index 000000000..c2b842f39 --- /dev/null +++ b/cyberdrop_dl/utils/yaml.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from enum import Enum +from pathlib import Path, PurePath + +import yaml +from pydantic import BaseModel, ValidationError +from yarl import URL + +from cyberdrop_dl.clients.errors import InvalidYamlError +from cyberdrop_dl.utils.logger import print_to_console + + +def _save_as_str(dumper: yaml.Dumper, value): + if isinstance(value, Enum): + return dumper.represent_str(value.name) + return dumper.represent_str(str(value)) + + +yaml.add_multi_representer(PurePath, _save_as_str) +yaml.add_multi_representer(Enum, _save_as_str) +yaml.add_representer(URL, _save_as_str) + +VALIDATION_ERROR_FOOTER = """ +Read the documentation for guidance on how to resolve this error: https://script-ware.gitbook.io/cyberdrop-dl/reference/configuration-options +Please note, this is not a bug. Do not open issues related to this""" + + +def save(file: Path, data: BaseModel | dict) -> None: + """Saves a dict to a yaml file.""" + if isinstance(data, BaseModel): + data = data.model_dump() + file.parent.mkdir(parents=True, exist_ok=True) + with file.open("w", encoding="utf8") as yaml_file: + yaml.dump(data, yaml_file) + + +def load(file: Path, *, create: bool = False) -> dict: + """Loads a yaml file and returns it as a dict.""" + if create: + file.parent.mkdir(parents=True, exist_ok=True) + if not file.is_file(): + file.touch() + try: + with file.open(encoding="utf8") as yaml_file: + yaml_values = yaml.safe_load(yaml_file.read()) + return yaml_values if yaml_values else {} + except yaml.constructor.ConstructorError as e: + raise InvalidYamlError(file, e) from None + + +def handle_validation_error(e: ValidationError, *, title: str | None = None, sources: dict | None = None): + error_count = e.error_count() + source: Path = sources.get(e.title, None) if sources else None + title = title or e.title + source = f"from {source.resolve()}" if source else "" + msg = f"found {error_count} error{'s' if error_count>1 else ''} parsing {title} {source}" + print_to_console(msg, error=True) + for error in e.errors(include_url=False): + loc = ".".join(map(str, error["loc"])) + if title == "CLI arguments": + loc = error["loc"][-1] + if isinstance(error["loc"][-1], int): + loc = ".".join(map(str, error["loc"][-2:])) + loc = f"--{loc}" + msg = f"\nValue of '{loc}' is invalid:" + print_to_console(msg, markup=False) + print_to_console( + f" {error['msg']} (input_value='{error['input']}', input_type='{error['type']}')", style="bold red" + ) + print_to_console(VALIDATION_ERROR_FOOTER) diff --git a/poetry.lock b/poetry.lock index f64c8c93e..3f88570e9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -594,20 +594,6 @@ files = [ {file = "get_video_properties-0.1.1-py3-none-any.whl", hash = "sha256:04d4f478a5211917e2a7e87ddfcb1c17734cddf8374494c3993bf825b7ad4192"}, ] -[[package]] -name = "humanfriendly" -version = "10.0" -description = "Human friendly output for text interfaces using Python" -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -files = [ - {file = "humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477"}, - {file = "humanfriendly-10.0.tar.gz", hash = "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc"}, -] - -[package.dependencies] -pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_version >= \"3.8\""} - [[package]] name = "identify" version = "2.6.2" @@ -1301,20 +1287,6 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] -[[package]] -name = "pyreadline3" -version = "3.5.4" -description = "A python implementation of GNU readline." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, - {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"}, -] - -[package.extras] -dev = ["build", "flake8", "mypy", "pytest", "twine"] - [[package]] name = "python-dateutil" version = "2.9.0.post0" @@ -1847,4 +1819,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "29b7d6f3aa7f841efebc9c7b253a37018edccb332e2baf1303c60d4db88abdb7" +content-hash = "bdb9e96c3c4448c862be5e47e5ed7e69338a139d693e96c6a9201947eed72ee5" diff --git a/pyproject.toml b/pyproject.toml index f42edd9f3..f51316e57 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,7 +28,6 @@ mediafire = "^0.6.1" mutagen = "^1.47.0" pillow = "^10.2.0" get-video-properties = "^0.1.1" -humanfriendly = "^10.0" send2trash = "^1.8.3" arrow = "^1.3.0" apprise = "^1.9.0"