Skip to content

Commit

Permalink
refactor: use pydantic models for config validation (jbsparrow#316)
Browse files Browse the repository at this point in the history
* refactor: use pydantic for config validation

* refactor: replace every config setting reference

* refactor: use yaml manager for every read/write

* refactor:  add AppriseURL type

* refactor: use proper types

* refactor: add default config values

* refactor: add aliases

* refactor: add yaml custom representers

* refactor: add pydantic ValidationError handle

* fix: AppriseURL Custom Model

* refactor: remove SecretStr from auth data

They are annoying to work with and we never log them so it's fine to use regular str

* refactor: add HttpAppriseURL validator

* refactor: use StrEnum for ScrapeItemType

* refactor: replace yaml_manager with a module

* refactor: use dataclasses for url_objects

* refactor: update configs if some fields were missing

* refactor: use pydantic models to create CLI args dinamically

* refactor: update parsed_args references

delete args_manager and use an instance of ParsedArgs

* refactor: add aliases for input_file and download_folder

* refactor: do args consolidation before path_startup

* fix: incorrect merge of cli and config settings

* fix: MediaItem references

* sync: rebase from master

* refactor: update config to pydantic config if necessary

* refactor: remove dedupe options from global settings

* refactor: add footer to InvalidYamlError

* fix: deprecated warnings

* refactor: add "ALL" config

* fix: circular import

* fix: download_speed_limit

* fix: validation error handle for apprise.txt

* fix: optional deprecated arguments

* refactor: remove humanfriendly dependency
  • Loading branch information
NTFSvolume authored Nov 28, 2024
1 parent 3ff244e commit 1947b52
Show file tree
Hide file tree
Showing 82 changed files with 1,363 additions and 1,682 deletions.
52 changes: 27 additions & 25 deletions cyberdrop_dl/clients/download_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
from collections.abc import Callable, Coroutine
from typing import Any

from yarl import URL

from cyberdrop_dl.managers.client_manager import ClientManager
from cyberdrop_dl.managers.manager import Manager
from cyberdrop_dl.utils.data_enums_classes.url_objects import MediaItem
Expand Down Expand Up @@ -90,6 +92,15 @@ async def on_request_end(*args):

"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""

def add_api_key_headers(self, domain: str, referer: URL):
download_headers = copy.deepcopy(self._headers)
download_headers["Referer"] = str(referer)
auth_data = self.manager.config_manager.authentication_data
if domain == "pixeldrain" and auth_data.pixeldrain.api_key:
download_headers["Authorization"] = self.manager.download_manager.basic_auth(
"Cyberdrop-DL", auth_data.pixeldrain.api_key
)

@limiter
async def _download(
self,
Expand All @@ -100,23 +111,14 @@ async def _download(
client_session: ClientSession,
) -> bool:
"""Downloads a file."""
download_headers = copy.deepcopy(self._headers)
download_headers["Referer"] = str(media_item.referer)
if (
domain == "pixeldrain"
and self.manager.config_manager.authentication_data["PixelDrain"]["pixeldrain_api_key"]
):
download_headers["Authorization"] = self.manager.download_manager.basic_auth(
"Cyberdrop-DL",
self.manager.config_manager.authentication_data["PixelDrain"]["pixeldrain_api_key"],
)
download_headers = self.add_api_key_headers(domain, media_item.referer)

downloaded_filename = await self.manager.db_manager.history_table.get_downloaded_filename(domain, media_item)
download_dir = self.get_download_dir(media_item)
media_item.partial_file = download_dir / f"{downloaded_filename}.part"

resume_point = 0
if isinstance(media_item.partial_file, Path) and media_item.partial_file.exists():
if media_item.partial_file and media_item.partial_file.exists():
resume_point = media_item.partial_file.stat().st_size if media_item.partial_file.exists() else 0
download_headers["Range"] = f"bytes={resume_point}-"

Expand All @@ -136,7 +138,7 @@ async def _download(
content_type = resp.headers.get("Content-Type")

media_item.filesize = int(resp.headers.get("Content-Length", "0"))
if not isinstance(media_item.complete_file, Path):
if not media_item.complete_file:
proceed, skip = await self.get_final_file_info(media_item, domain)
await self.mark_incomplete(media_item, domain)
self.client_manager.check_bunkr_maint(resp.headers)
Expand Down Expand Up @@ -198,7 +200,7 @@ async def _append_content(

async def download_file(self, manager: Manager, domain: str, media_item: MediaItem) -> bool:
"""Starts a file."""
if self.manager.config_manager.settings_data["Download_Options"]["skip_download_mark_completed"]:
if self.manager.config_manager.settings_data.download_options.skip_download_mark_completed:
log(f"Download Skip {media_item.url} due to mark completed option", 10)
self.manager.progress_manager.download_progress.add_skipped()
# set completed path
Expand Down Expand Up @@ -235,7 +237,7 @@ async def mark_completed(self, domain: str, media_item: MediaItem) -> None:
await self.manager.db_manager.history_table.mark_complete(domain, media_item)

async def add_file_size(self, domain: str, media_item: MediaItem) -> None:
if not isinstance(media_item.complete_file, Path):
if not media_item.complete_file:
media_item.complete_file = self.get_file_location(media_item)
if media_item.complete_file.exists():
await self.manager.db_manager.history_table.add_filesize(domain, media_item)
Expand All @@ -253,11 +255,11 @@ async def handle_media_item_completion(self, media_item: MediaItem, downloaded:
def get_download_dir(self, media_item: MediaItem) -> Path:
"""Returns the download directory for the media item."""
download_folder = media_item.download_folder
if self.manager.args_manager.retry_any:
if self.manager.parsed_args.cli_only_args.retry_any:
return download_folder

if self.manager.config_manager.settings_data["Download_Options"]["block_download_sub_folders"]:
while download_folder.parent != self.manager.path_manager.download_dir:
if self.manager.config_manager.settings_data.download_options.block_download_sub_folders:
while download_folder.parent != self.manager.path_manager.download_folder:
download_folder = download_folder.parent
media_item.download_folder = download_folder
return download_folder
Expand All @@ -271,7 +273,7 @@ async def get_final_file_info(self, media_item: MediaItem, domain: str) -> tuple
media_item.complete_file = self.get_file_location(media_item)
media_item.partial_file = media_item.complete_file.with_suffix(media_item.complete_file.suffix + ".part")

expected_size = media_item.filesize if isinstance(media_item.filesize, int) else None
expected_size = media_item.filesize
proceed = True
skip = False

Expand Down Expand Up @@ -352,13 +354,13 @@ async def iterate_filename(self, complete_file: Path, media_item: MediaItem) ->

def check_filesize_limits(self, media: MediaItem) -> bool:
"""Checks if the file size is within the limits."""
file_size_limits = self.manager.config_manager.settings_data["File_Size_Limits"]
max_video_filesize = file_size_limits["maximum_video_size"] or float("inf")
min_video_filesize = file_size_limits["minimum_video_size"]
max_image_filesize = file_size_limits["maximum_image_size"] or float("inf")
min_image_filesize = file_size_limits["minimum_image_size"]
max_other_filesize = file_size_limits["maximum_other_size"] or float("inf")
min_other_filesize = file_size_limits["minimum_other_size"]
file_size_limits = self.manager.config_manager.settings_data.file_size_limits
max_video_filesize = file_size_limits.maximum_video_size or float("inf")
min_video_filesize = file_size_limits.minimum_video_size
max_image_filesize = file_size_limits.maximum_image_size or float("inf")
min_image_filesize = file_size_limits.minimum_image_size
max_other_filesize = file_size_limits.maximum_other_size or float("inf")
min_other_filesize = file_size_limits.minimum_other_size

if media.ext in FILE_FORMATS["Images"]:
proceed = min_image_filesize < media.filesize < max_image_filesize
Expand Down
7 changes: 5 additions & 2 deletions cyberdrop_dl/clients/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
from cyberdrop_dl.scraper.crawler import ScrapeItem
from cyberdrop_dl.utils.data_enums_classes.url_objects import MediaItem

VALIDATION_ERROR_FOOTER = """
Read the documentation for guidance on how to resolve this error: https://script-ware.gitbook.io/cyberdrop-dl/reference/configuration-options
Please note, this is not a bug. Do not open issues related to this"""


class CDLBaseError(Exception):
"""Base exception for cyberdrop-dl errors."""
Expand Down Expand Up @@ -130,6 +134,5 @@ class InvalidYamlError(CDLBaseError):
def __init__(self, file: Path, e: ConstructorError) -> None:
"""This error will be thrown when a yaml config file has invalid values."""
mark = e.problem_mark if hasattr(e, "problem_mark") else e
message = f"ERROR: File '{file}' has an invalid config. Please verify and edit it manually\n {mark}"
self.message_rich = message.replace("ERROR:", "[bold red]ERROR:[/bold red]")
message = f"File '{file.resolve()}' has an invalid config. Please verify and edit it manually\n {mark}\n\n{VALIDATION_ERROR_FOOTER}"
super().__init__("Invalid YAML", message=message, origin=file)
19 changes: 7 additions & 12 deletions cyberdrop_dl/clients/hash_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from send2trash import send2trash

from cyberdrop_dl.ui.prompts.basic_prompts import enter_to_continue
from cyberdrop_dl.utils.data_enums_classes.hash import Hashing
from cyberdrop_dl.utils.logger import log

if TYPE_CHECKING:
Expand Down Expand Up @@ -67,9 +68,9 @@ def _get_key_from_file(file: Path | str):

async def hash_item_helper(self, file: Path | str, original_filename: str, referer: URL):
hash = await self.hash_item(file, original_filename, referer, hash_type=self.xxhash)
if self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["add_md5_hash"]:
if self.manager.config_manager.settings_data.dupe_cleanup_options.add_md5_hash:
await self.hash_item(file, original_filename, referer, hash_type=self.md5)
if self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["add_sha256_hash"]:
if self.manager.config_manager.settings_data.dupe_cleanup_options.add_sha256_hash:
await self.hash_item(file, original_filename, referer, hash_type=self.sha256)
return hash

Expand Down Expand Up @@ -110,23 +111,17 @@ async def hash_item(self, file: Path | str, original_filename: str, referer: URL

async def hash_item_during_download(self, media_item: MediaItem) -> None:
try:
if (
self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"]
!= self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"].IN_PLACE
):
if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing != Hashing.IN_PLACE:
return
await self.hash_item_helper(media_item.complete_file, media_item.original_filename, media_item.referer)
except Exception as e:
log(f"After hash processing failed: {media_item.complete_file} with error {e}", 40, exc_info=True)

async def cleanup_dupes_after_download(self) -> None:
with self.manager.live_manager.get_hash_live(stop=True):
if (
self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"]
== self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["hashing"].OFF
):
if self.manager.config_manager.settings_data.dupe_cleanup_options.hashing == Hashing.OFF:
return
if not self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["auto_dedupe"]:
if not self.manager.config_manager.settings_data.dupe_cleanup_options.auto_dedupe:
return
file_hashes_dict = await self.get_file_hashes_dict()
with self.manager.live_manager.get_remove_file_via_hash_live(stop=True):
Expand Down Expand Up @@ -172,7 +167,7 @@ async def get_file_hashes_dict(self) -> dict:
return hashes_dict

def send2trash(self, path: Path) -> None:
if not self.manager.config_manager.settings_data["Dupe_Cleanup_Options"]["send_deleted_to_trash"]:
if not self.manager.config_manager.settings_data.dupe_cleanup_options.send_deleted_to_trash:
Path(path).unlink(missing_ok=True)
log(f"permanently deleted file at {path}", 10)
return True
Expand Down
9 changes: 9 additions & 0 deletions cyberdrop_dl/config_definitions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from .authentication_settings import AuthSettings
from .config_settings import ConfigSettings
from .global_settings import GlobalSettings

__all__ = {
AuthSettings,
ConfigSettings,
GlobalSettings,
}
74 changes: 74 additions & 0 deletions cyberdrop_dl/config_definitions/authentication_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
from pydantic import BaseModel, Field

from .custom_types import AliasModel


class ForumAuth(BaseModel):
celebforum_xf_user_cookie: str = ""
celebforum_username: str = ""
celebforum_password: str = ""
f95zone_xf_user_cookie: str = ""
f95zone_username: str = ""
f95zone_password: str = ""
leakedmodels_xf_user_cookie: str = ""
leakedmodels_username: str = ""
leakedmodels_password: str = ""
nudostar_xf_user_cookie: str = ""
nudostar_username: str = ""
nudostar_password: str = ""
simpcity_xf_user_cookie: str = ""
simpcity_username: str = ""
simpcity_password: str = ""
socialmediagirls_xf_user_cookie: str = ""
socialmediagirls_username: str = ""
socialmediagirls_password: str = ""
xbunker_xf_user_cookie: str = ""
xbunker_username: str = ""
xbunker_password: str = ""


class CoomerAuth(BaseModel):
session: str = ""


class XXXBunkerAuth(BaseModel):
PHPSESSID: str = ""


class ImgurAuth(BaseModel):
client_id: str = ""


class JDownloaderAuth(AliasModel):
username: str = Field("", validation_alias="jdownloader_username")
password: str = Field("", validation_alias="jdownloader_password")
device: str = Field("", validation_alias="jdownloader_device")


class RedditAuth(BaseModel):
personal_use_script: str = ""
secret: str = ""


class GoFileAuth(AliasModel):
api_key: str = Field("", validation_alias="gofile_api_key")


class PixeldrainAuth(AliasModel):
api_key: str = Field("", validation_alias="pixeldrain_api_key")


class RealDebridAuth(AliasModel):
api_key: str = Field("", validation_alias="realdebrid_api_key")


class AuthSettings(AliasModel):
coomer: CoomerAuth = Field(validation_alias="Coomer", default=CoomerAuth())
forums: ForumAuth = Field(validation_alias="Forums", default=ForumAuth())
gofile: GoFileAuth = Field(validation_alias="GoFile", default=GoFileAuth())
imgur: ImgurAuth = Field(validation_alias="Imgur", default=ImgurAuth())
jdownloader: JDownloaderAuth = Field(validation_alias="JDownloader", default=JDownloaderAuth())
pixeldrain: PixeldrainAuth = Field(validation_alias="PixelDrain", default=PixeldrainAuth())
realdebrid: RealDebridAuth = Field(validation_alias="RealDebrid", default=RealDebridAuth())
reddit: RedditAuth = Field(validation_alias="Reddit", default=RedditAuth())
xxxbunker: XXXBunkerAuth = Field(validation_alias="XXXBunker", default=XXXBunkerAuth())
Loading

0 comments on commit 1947b52

Please sign in to comment.