Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
Try dealing with some unknown errors.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-WinnfieldX committed Dec 15, 2023
1 parent 52d0b1c commit b21f2c1
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5.0.111"
__version__ = "5.0.112"
19 changes: 9 additions & 10 deletions cyberdrop_dl/scraper/crawlers/jpgchurch_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,12 @@

from cyberdrop_dl.scraper.crawler import Crawler
from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem
from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext
from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext, FILE_FORMATS

if TYPE_CHECKING:
from cyberdrop_dl.managers.manager import Manager


def check_direct_link(url: URL) -> bool:
"""Determines if the url is a direct link or not"""
cdn_possibilities = r"(?:(jpg.church\/images\/...)|(simp..jpg.church)|(jpg.fish\/images\/...)|(simp..jpg.fish)|(jpg.fishing\/images\/...)|(simp..jpg.fishing))"
if not re.match(cdn_possibilities, url.host):
return False
return True


class JPGChurchCrawler(Crawler):
def __init__(self, manager: Manager):
super().__init__(manager, "sharex", "JPGChurch")
Expand All @@ -36,7 +28,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None:
"""Determines where to send the scrape item based on the url"""
task_id = await self.scraping_progress.add_task(scrape_item.url)

if check_direct_link(scrape_item.url):
if await self.check_direct_link(scrape_item.url) or scrape_item.url.suffix in FILE_FORMATS['Images']:
await self.handle_direct_link(scrape_item)
else:
scrape_item.url = self.primary_base_domain / scrape_item.url.path[1:]
Expand Down Expand Up @@ -143,3 +135,10 @@ async def parse_datetime(self, date: str) -> int:
"""Parses a datetime string into a unix timestamp"""
date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
return calendar.timegm(date.timetuple())

async def check_direct_link(self, url: URL) -> bool:
"""Determines if the url is a direct link or not"""
cdn_possibilities = r"^(?:(jpg.church\/images\/...)|(simp..jpg.church)|(jpg.fish\/images\/...)|(simp..jpg.fish)|(jpg.fishing\/images\/...)|(simp..jpg.fishing))"
if not re.match(cdn_possibilities, url.host):
return False
return True
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl"
version = "5.0.111"
version = "5.0.112"
description = "Bulk downloader for multiple file hosts"
authors = ["Jules Winnfield <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit b21f2c1

Please sign in to comment.