Skip to content

Commit

Permalink
fix: chevereto CDN regex
Browse files Browse the repository at this point in the history
img.wiki uses `/images` subpath to serve their assets (direct links). Matching by host is not enough, regex needs to match the entire URL
  • Loading branch information
NTFSvolume committed Nov 19, 2024
1 parent b512732 commit 7f597de
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 8 deletions.
12 changes: 6 additions & 6 deletions cyberdrop_dl/scraper/crawlers/chevereto_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@
from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem

CDN_PATTERNS = {
"jpg.church": r"^(?:(jpg.church\/images\/...)|(simp..jpg.church)|(jpg.fish\/images\/...)|(simp..jpg.fish)|(jpg.fishing\/images\/...)|(simp..jpg.fishing)|(simp..host.church)|(simp..jpg..su))",
"imagepond.net": r"(media.imagepond.net)",
"img.kiwi": r"^(?:(img.kiwi\/images\/...))",
"jpg.church": r"^(?:https?:\/\/?)((jpg.church\/images)|(simp..jpg.church)|(jpg.fish\/images)|(simp..jpg.fish)|(jpg.fishing\/images)|(simp..jpg.fishing)|(simp..host.church)|(simp..jpg..su))(\/.*)",
"imagepond.net": r"^(?:https?:\/\/)?(media.imagepond.net\/.*)",
"img.kiwi": r"^(?:https?:\/\/)?img\.kiwi\/images\/.*",
}

CDN_POSSIBILITIES = re.compile("|".join(CDN_PATTERNS.values()))
Expand Down Expand Up @@ -74,7 +74,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None:
"""Determines where to send the scrape item based on the url."""
task_id = self.scraping_progress.add_task(scrape_item.url)

if await self.check_direct_link(scrape_item.url):
if self.check_direct_link(scrape_item.url):
await self.handle_direct_link(scrape_item)
else:
scrape_item.url = self.primary_base_domain.with_path(scrape_item.url.path[1:]).with_query(
Expand Down Expand Up @@ -251,6 +251,6 @@ def parse_datetime(date: str) -> int:
return calendar.timegm(date.timetuple())

@staticmethod
async def check_direct_link(url: URL) -> bool:
def check_direct_link(url: URL) -> bool:
"""Determines if the url is a direct link or not."""
return re.match(CDN_POSSIBILITIES, url.host)
return bool(CDN_POSSIBILITIES.match(str(url)))
2 changes: 0 additions & 2 deletions cyberdrop_dl/ui/prompts/user_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,5 +190,3 @@ def prompt_header(manager: Manager, title: str | None = None) -> None:
title = title or f"[bold]Cyberdrop Downloader ([blue]V{__version__!s}[/blue])[/bold]"
console.print(title)
console.print(f"[bold]Current config:[/bold] [blue]{manager.config_manager.loaded_config}[/blue]")


0 comments on commit 7f597de

Please sign in to comment.