Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
add user page handling for jpg.church
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-WinnfieldX committed Dec 7, 2023
1 parent dc1cf61 commit 41281bd
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 3 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5.0.71"
__version__ = "5.0.72"
32 changes: 31 additions & 1 deletion cyberdrop_dl/scraper/crawlers/jpgchurch_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,41 @@ async def fetch(self, scrape_item: ScrapeItem) -> None:
scrape_item.url = self.primary_base_domain / scrape_item.url.path[1:]
if "a" in scrape_item.url.parts or "album" in scrape_item.url.parts:
await self.album(scrape_item)
else:
elif 'image' in scrape_item.url.parts or 'img' in scrape_item.url.parts or 'images' in scrape_item.url.parts:
await self.image(scrape_item)
else:
await self.profile(scrape_item)

await self.scraping_progress.remove_task(task_id)

@error_handling_wrapper
async def profile(self, scrape_item: ScrapeItem) -> None:
"""Scrapes a user profile"""
async with self.request_limiter:
soup = await self.client.get_BS4(self.domain, scrape_item.url)

title = await self.create_title(soup.select_one('meta[property="og:title"]').get("content"), None, None)
link_next = URL(soup.select_one("a[id=list-most-recent-link]").get("href"))

while True:
async with self.request_limiter:
soup = await self.client.get_BS4(self.domain, link_next)
links = soup.select("a[href*=img]")
for link in links:
link = URL(link.get('href'))
new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True)
await self.scraper_queue.put(new_scrape_item)

link_next = soup.select_one('a[data-pagination=next]')
if link_next is not None:
link_next = link_next.get('href')
if link_next is not None:
link_next = URL(link_next)
else:
break
else:
break

@error_handling_wrapper
async def album(self, scrape_item: ScrapeItem) -> None:
"""Scrapes an album"""
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl"
version = "5.0.71"
version = "5.0.72"
description = "Bulk downloader for multiple file hosts"
authors = ["Jules Winnfield <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit 41281bd

Please sign in to comment.