Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
Try dealing with some unknown errors.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-WinnfieldX committed Dec 15, 2023
1 parent fb56784 commit 52d0b1c
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 10 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5.0.109"
__version__ = "5.0.111"
17 changes: 9 additions & 8 deletions cyberdrop_dl/scraper/crawlers/jpgchurch_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,14 @@
from cyberdrop_dl.managers.manager import Manager


def check_direct_link(url: URL) -> bool:
"""Determines if the url is a direct link or not"""
cdn_possibilities = r"(?:(jpg.church\/images\/...)|(simp..jpg.church)|(jpg.fish\/images\/...)|(simp..jpg.fish)|(jpg.fishing\/images\/...)|(simp..jpg.fishing))"
if not re.match(cdn_possibilities, url.host):
return False
return True


class JPGChurchCrawler(Crawler):
def __init__(self, manager: Manager):
super().__init__(manager, "sharex", "JPGChurch")
Expand All @@ -28,7 +36,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None:
"""Determines where to send the scrape item based on the url"""
task_id = await self.scraping_progress.add_task(scrape_item.url)

if await self.check_direct_link(scrape_item.url):
if check_direct_link(scrape_item.url):
await self.handle_direct_link(scrape_item)
else:
scrape_item.url = self.primary_base_domain / scrape_item.url.path[1:]
Expand Down Expand Up @@ -135,10 +143,3 @@ async def parse_datetime(self, date: str) -> int:
"""Parses a datetime string into a unix timestamp"""
date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S")
return calendar.timegm(date.timetuple())

async def check_direct_link(self, url: URL) -> bool:
"""Determines if the url is a direct link or not"""
cdn_possibilities = r"(?:(jpg.church\/images\/...)|(simp..jpg.church)|(jpg.fish\/images\/...)|(simp..jpg.fish)|(jpg.fishing\/images\/...)|(simp..jpg.fishing))"
if not re.match(cdn_possibilities, url.host):
return False
return True
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl"
version = "5.0.109"
version = "5.0.111"
description = "Bulk downloader for multiple file hosts"
authors = ["Jules Winnfield <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit 52d0b1c

Please sign in to comment.