Skip to content

Commit

Permalink
Fix scan_folder error when set to None (#274)
Browse files Browse the repository at this point in the history
* Fix scan_folder error when set to None

Fix the error caused by scan_folder being `None` instead of `null`.

Also added `html.parser` to BeautifulSoup instances that didn't have it.

* Ruff formatting

* Automatic config verification

Not complete, doesn't work right now. Will work on it more later today.

* Fix automatic verification

* Ruff fixes

* Use isinstance instead of type()

* Update config_manager.py

---------

Co-authored-by: NTFSvolume <[email protected]>
  • Loading branch information
jbsparrow and NTFSvolume authored Nov 13, 2024
1 parent 2e39ab1 commit 861e2ce
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 135 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/managers/client_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ async def check_http_status(

with contextlib.suppress(UnicodeDecodeError):
response_text = await response.text()
soup = BeautifulSoup(response_text)
soup = BeautifulSoup(response_text, "html.parser")
if cls.check_ddos_guard(soup):
raise DDOSGuardError(origin=origin)

Expand Down
197 changes: 63 additions & 134 deletions cyberdrop_dl/managers/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,20 @@ def load_configs(self) -> None:
self.settings_data["Sorting"]["scan_folder"] = None
self.write_updated_settings_config()

def return_verified(self, value) -> any:
if isinstance(value, int):
return int(value)
elif isinstance(value, bool):
return bool(value)
elif isinstance(value, str):
return str(value)
elif isinstance(value, list):
return list(value)
elif isinstance(value, dict):
return dict(value)
else:
return value

def _verify_authentication_config(self) -> None:
"""Verifies the authentication config file and creates it if it doesn't exist."""
default_auth_data = copy.deepcopy(authentication_settings)
Expand All @@ -137,147 +151,57 @@ def _verify_settings_config(self) -> None:
default_settings_data = copy.deepcopy(settings)
existing_settings_data = _load_yaml(self.settings)
self.settings_data = _match_config_dicts(default_settings_data, existing_settings_data)
self.settings_data["Files"]["input_file"] = Path(self.settings_data["Files"]["input_file"])
self.settings_data["Files"]["download_folder"] = Path(self.settings_data["Files"]["download_folder"])
self.settings_data["Logs"]["log_folder"] = Path(self.settings_data["Logs"]["log_folder"])
self.settings_data["Logs"]["webhook_url"] = (
str(self.settings_data["Logs"]["webhook_url"]) if self.settings_data["Logs"]["webhook_url"] is not None else None
)
self.settings_data["Sorting"]["sort_folder"] = Path(self.settings_data["Sorting"]["sort_folder"])
self.settings_data["Sorting"]["scan_folder"] = (
Path(self.settings_data["Sorting"]["scan_folder"]) if self.settings_data["Sorting"]["scan_folder"] else None
)

# change to ints
self.settings_data["File_Size_Limits"]["maximum_image_size"] = int(
self.settings_data["File_Size_Limits"]["maximum_image_size"],
)

self.settings_data["File_Size_Limits"]["maximum_video_size"] = int(
self.settings_data["File_Size_Limits"]["maximum_video_size"],
)
self.settings_data["File_Size_Limits"]["maximum_other_size"] = int(
self.settings_data["File_Size_Limits"]["maximum_other_size"],
)
self.settings_data["File_Size_Limits"]["minimum_image_size"] = int(
self.settings_data["File_Size_Limits"]["minimum_image_size"],
)
self.settings_data["File_Size_Limits"]["minimum_video_size"] = int(
self.settings_data["File_Size_Limits"]["minimum_video_size"],
)
self.settings_data["File_Size_Limits"]["minimum_other_size"] = int(
self.settings_data["File_Size_Limits"]["minimum_other_size"],
)
paths = [
("Files", "input_file"),
("Files", "download_folder"),
("Logs", "log_folder"),
("Sorting", "sort_folder"),
("Sorting", "scan_folder"),
]

for key, value in default_settings_data.items():
for subkey, subvalue in value.items():
self.settings_data[key][subkey] = self.return_verified(subvalue)

for path_item in paths:
if key == path_item[0] and subkey == path_item[1]:
path = self.settings_data[key][subkey]
if (path == "None" or path is None) and subkey == "scan_folder":
self.settings_data[key][subkey] = None
else:
self.settings_data[key][subkey] = Path(path)

self.settings_data["Runtime_Options"]["log_level"] = int(self.settings_data["Runtime_Options"]["log_level"])
if get_keys(default_settings_data) == get_keys(existing_settings_data):
return

self.settings_data["Runtime_Options"]["console_log_level"] = int(
self.settings_data["Runtime_Options"]["console_log_level"],
)
# convert paths to str
settings_data = copy.deepcopy(self.settings_data)
settings_data["Files"]["input_file"] = (
str(self.settings_data["Files"]["input_file"])
if self.settings_data["Files"]["input_file"] is not None
else None
)
settings_data["Files"]["download_folder"] = (
str(self.settings_data["Files"]["download_folder"])
if self.settings_data["Files"]["download_folder"] is not None
else None
)
settings_data["Logs"]["log_folder"] = (
str(self.settings_data["Logs"]["log_folder"])
if self.settings_data["Logs"]["log_folder"] is not None
else None
)
settings_data["Logs"]["webhook_url"] = (
str(self.settings_data["Logs"]["webhook_url"])
if self.settings_data["Logs"]["webhook_url"] is not None
else None
)
settings_data["Sorting"]["sort_folder"] = (
str(self.settings_data["Sorting"]["sort_folder"])
if self.settings_data["Sorting"]["sort_folder"] is not None
else None
)
settings_data["Sorting"]["scan_folder"] = (
str(self.settings_data["Sorting"]["scan_folder"])
if self.settings_data["Sorting"]["scan_folder"] is not None
save_data = copy.deepcopy(self.settings_data)
save_data["Files"]["input_file"] = str(save_data["Files"]["input_file"])
save_data["Files"]["download_folder"] = str(save_data["Files"]["download_folder"])
save_data["Logs"]["log_folder"] = str(save_data["Logs"]["log_folder"])
save_data["Logs"]["webhook_url"] = str(save_data["Logs"]["webhook_url"])
save_data["Sorting"]["sort_folder"] = str(save_data["Sorting"]["sort_folder"])
save_data["Sorting"]["scan_folder"] = (
str(save_data["Sorting"]["scan_folder"])
if save_data["Sorting"]["scan_folder"] not in ["None", None]
else None
)
if get_keys(default_settings_data) == get_keys(existing_settings_data):
return
_save_yaml(self.settings, settings_data)
_save_yaml(self.settings, save_data)

def _verify_global_settings_config(self) -> None:
default_global_data = copy.deepcopy(global_settings)
existing_global_data = _load_yaml(self.global_settings)
self.global_settings_data = _match_config_dicts(default_global_data, existing_global_data)
"""Verifies the global settings config file and creates it if it doesn't exist."""
default_global_settings_data = copy.deepcopy(global_settings)
existing_global_settings_data = _load_yaml(self.global_settings)

self.global_settings_data["General"]["max_file_name_length"] = int(
self.global_settings_data["General"]["max_file_name_length"],
)
self.global_settings_data["General"]["max_folder_name_length"] = int(
self.global_settings_data["General"]["max_folder_name_length"],
)
self.global_settings_data["Rate_Limiting_Options"]["connection_timeout"] = int(
self.global_settings_data["Rate_Limiting_Options"]["connection_timeout"],
)
self.global_settings_data["Rate_Limiting_Options"]["download_attempts"] = int(
self.global_settings_data["Rate_Limiting_Options"]["download_attempts"],
)
self.global_settings_data["Rate_Limiting_Options"]["download_delay"] = int(
self.global_settings_data["Rate_Limiting_Options"]["download_delay"],
)
self.global_settings_data["Rate_Limiting_Options"]["max_simultaneous_downloads"] = int(
self.global_settings_data["Rate_Limiting_Options"]["max_simultaneous_downloads"],
)
self.global_settings_data["Rate_Limiting_Options"]["max_simultaneous_downloads_per_domain"] = int(
self.global_settings_data["Rate_Limiting_Options"]["max_simultaneous_downloads_per_domain"],
)
self.global_settings_data["Rate_Limiting_Options"]["rate_limit"] = int(
self.global_settings_data["Rate_Limiting_Options"]["rate_limit"],
)
if get_keys(default_global_settings_data) == get_keys(existing_global_settings_data):
self.global_settings_data = existing_global_settings_data
return

self.global_settings_data["Rate_Limiting_Options"]["download_speed_limit"] = int(
self.global_settings_data["Rate_Limiting_Options"]["download_speed_limit"],
)
self.global_settings_data["Rate_Limiting_Options"]["read_timeout"] = int(
self.global_settings_data["Rate_Limiting_Options"]["read_timeout"],
)
self.global_settings_data["UI_Options"]["refresh_rate"] = int(
self.global_settings_data["UI_Options"]["refresh_rate"],
)
self.global_settings_data["UI_Options"]["scraping_item_limit"] = int(
self.global_settings_data["UI_Options"]["scraping_item_limit"],
)
self.global_settings_data["UI_Options"]["downloading_item_limit"] = int(
self.global_settings_data["UI_Options"]["downloading_item_limit"],
)
for key, value in default_global_settings_data.items():
for subkey, subvalue in value.items():
self.global_settings_data[key][subkey] = self.return_verified(subvalue)

self.global_settings_data = _match_config_dicts(default_global_settings_data, existing_global_settings_data)

# other changes

self.global_settings_data["Dupe_Cleanup_Options"]["delete_after_download"] = self.global_settings_data[
"Dupe_Cleanup_Options"
]["delete_after_download"]
self.global_settings_data["Dupe_Cleanup_Options"]["hash_while_downloading"] = self.global_settings_data[
"Dupe_Cleanup_Options"
]["hash_while_downloading"]
self.global_settings_data["Dupe_Cleanup_Options"]["dedupe_already_downloaded"] = self.global_settings_data[
"Dupe_Cleanup_Options"
]["dedupe_already_downloaded"]
self.global_settings_data["Dupe_Cleanup_Options"]["keep_prev_download"] = self.global_settings_data[
"Dupe_Cleanup_Options"
]["keep_prev_download"]
self.global_settings_data["Dupe_Cleanup_Options"]["keep_new_download"] = self.global_settings_data[
"Dupe_Cleanup_Options"
]["keep_new_download"]
self.global_settings_data["Dupe_Cleanup_Options"]["delete_off_disk"] = self.global_settings_data[
"Dupe_Cleanup_Options"
]["delete_off_disk"]
if get_keys(default_global_data) == get_keys(existing_global_data):
return
_save_yaml(self.global_settings, self.global_settings_data)

"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
Expand Down Expand Up @@ -305,8 +229,13 @@ def create_new_config(new_settings: Path, settings_data: dict) -> None:
else None
)
settings_data["Sorting"]["scan_folder"] = (
str(settings_data["Sorting"]["scan_folder"]) if settings_data["Sorting"]["scan_folder"] is not None else None

str(settings_data["Sorting"]["scan_folder"])
if settings_data["Sorting"]["scan_folder"] not in ["None", None]
else None

)

_save_yaml(new_settings, settings_data)

def write_updated_authentication_config(self) -> None:
Expand Down Expand Up @@ -337,7 +266,7 @@ def write_updated_settings_config(self) -> None:
)
settings_data["Sorting"]["scan_folder"] = (
str(settings_data["Sorting"]["scan_folder"])
if settings_data["Sorting"]["scan_folder"] is not None
if settings_data["Sorting"]["scan_folder"] not in ["None", None]
else None
)

Expand Down
1 change: 1 addition & 0 deletions cyberdrop_dl/scraper/crawlers/chevereto_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ async def album(self, scrape_item: ScrapeItem) -> None:
raw=True,
origin=scrape_item,
),
"html.parser",
)

if "This content is password protected" in sub_albums_soup.text:
Expand Down
1 change: 1 addition & 0 deletions cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,7 @@ async def file(self, scrape_item: ScrapeItem) -> None:
raw=True,
origin=scrape_item,
),
"html.parser",
)
if "File password is invalid" in soup.text:
raise PasswordProtectedError(origin=scrape_item)
Expand Down

0 comments on commit 861e2ce

Please sign in to comment.