Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
Added Rule34Vault support
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-WinnfieldX committed Jun 15, 2024
1 parent 852342d commit 7769595
Show file tree
Hide file tree
Showing 5 changed files with 137 additions and 5 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5.3.27"
__version__ = "5.3.28"
127 changes: 127 additions & 0 deletions cyberdrop_dl/scraper/crawlers/rule34vault_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
from __future__ import annotations

import calendar
import datetime
from typing import TYPE_CHECKING

from aiolimiter import AsyncLimiter
from yarl import URL

from cyberdrop_dl.scraper.crawler import Crawler
from cyberdrop_dl.utils.dataclasses.url_objects import ScrapeItem
from cyberdrop_dl.utils.utilities import get_filename_and_ext, error_handling_wrapper

if TYPE_CHECKING:
from cyberdrop_dl.managers.manager import Manager


class Rule34VaultCrawler(Crawler):
def __init__(self, manager: Manager):
super().__init__(manager, "rule34vault", "Rule34Vault")
self.primary_base_url = URL("https://rule34vault.com")
self.request_limiter = AsyncLimiter(10, 1)

"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""

async def fetch(self, scrape_item: ScrapeItem) -> None:
"""Determines where to send the scrape item based on the url"""
task_id = await self.scraping_progress.add_task(scrape_item.url)

if "post" in scrape_item.url.parts:
await self.file(scrape_item)
elif "playlists" in scrape_item.url.parts:
await self.playlist(scrape_item)
else:
await self.tag(scrape_item)

await self.scraping_progress.remove_task(task_id)

@error_handling_wrapper
async def tag(self, scrape_item: ScrapeItem) -> None:
"""Scrapes an album"""
async with self.request_limiter:
soup = await self.client.get_BS4(self.domain, scrape_item.url)

title = await self.create_title(scrape_item.url.parts[1], None, None)

content_block = soup.select_one('div[class="grid ng-star-inserted"]')
content = content_block.select('a[class="box ng-star-inserted"]')
for file_page in content:
link = file_page.get('href')
if link.startswith("/"):
link = f"{self.primary_base_url}{link}"
link = URL(link)
new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True)
self.manager.task_group.create_task(self.run(new_scrape_item))
if not content:
return

if len(scrape_item.url.parts) > 2:
page = int(scrape_item.url.parts[-1])
next_page = scrape_item.url.with_path(f"/{scrape_item.url.parts[1]}/page/{page + 1}")
else:
next_page = scrape_item.url.with_path(f"/{scrape_item.url.parts[1]}/page/2")
new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "")
self.manager.task_group.create_task(self.run(new_scrape_item))

@error_handling_wrapper
async def playlist(self, scrape_item: ScrapeItem) -> None:
"""Scrapes a playlist"""
async with self.request_limiter:
soup = await self.client.get_BS4(self.domain, scrape_item.url)

title_str = soup.select_one('div[class*=title]').text
title = await self.create_title(title_str, scrape_item.url.parts[-1], None)

content_block = soup.select_one('div[class="grid ng-star-inserted"]')
content = content_block.select('a[class="box ng-star-inserted"]')
for file_page in content:
link = file_page.get('href')
if link.startswith("/"):
link = f"{self.primary_base_url}{link}"
link = URL(link)
new_scrape_item = await self.create_scrape_item(scrape_item, link, title, True)
self.manager.task_group.create_task(self.run(new_scrape_item))
if not content:
return

if scrape_item.url.query:
page = scrape_item.url.query.get("page")
next_page = scrape_item.url.with_query({"page": int(page) + 1})
else:
next_page = scrape_item.url.with_query({"page": 2})
new_scrape_item = await self.create_scrape_item(scrape_item, next_page, "")
self.manager.task_group.create_task(self.run(new_scrape_item))

@error_handling_wrapper
async def file(self, scrape_item: ScrapeItem) -> None:
"""Scrapes an image"""
async with self.request_limiter:
soup = await self.client.get_BS4(self.domain, scrape_item.url)

date = await self.parse_datetime(soup.select_one('div[class="text-primary ng-star-inserted"]').text.split("(")[1].split(")")[0])
scrape_item.date = date

image = soup.select_one('img[class*="img ng-star-inserted"]')
if image:
link = image.get('src')
if link.startswith("/"):
link = f"{self.primary_base_url}{link}"
link = URL(link)
filename, ext = await get_filename_and_ext(link.name)
await self.handle_file(link, scrape_item, filename, ext)
video = soup.select_one("video source")
if video:
link = video.get('src')
if link.startswith("/"):
link = f"{self.primary_base_url}{link}"
link = URL(link)
filename, ext = await get_filename_and_ext(link.name)
await self.handle_file(link, scrape_item, filename, ext)

"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""

async def parse_datetime(self, date: str) -> int:
"""Parses a datetime string into a unix timestamp"""
date = datetime.datetime.strptime(date, "%b %d, %Y, %I:%M:%S %p")
return calendar.timegm(date.timetuple())
9 changes: 7 additions & 2 deletions cyberdrop_dl/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ def __init__(self, manager: Manager):
"mediafire": self.mediafire, "nudostar.com": self.nudostar, "nudostar.tv": self.nudostartv,
"omegascans": self.omegascans, "pimpandhost": self.pimpandhost, "pixeldrain": self.pixeldrain,
"postimg": self.postimg, "realbooru": self.realbooru, "reddit": self.reddit,
"redd.it": self.reddit, "redgifs": self.redgifs, "rule34.xxx": self.rule34xxx,
"rule34.xyz": self.rule34xyz, "saint": self.saint, "scrolller": self.scrolller,
"redd.it": self.reddit, "redgifs": self.redgifs, "rule34vault": self.rule34vault, "rule34.xxx": self.rule34xxx,
"rule34.xyz": self.rule34xyz, "saint": self.saint, "scrolller": self.scrolller,
"simpcity": self.simpcity, "socialmediagirls": self.socialmediagirls, "toonily": self.toonily,
"xbunker": self.xbunker, "xbunkr": self.xbunkr, "bunkr": self.bunkrr}
self.existing_crawlers = {}
Expand Down Expand Up @@ -201,6 +201,11 @@ async def redgifs(self) -> None:
from cyberdrop_dl.scraper.crawlers.redgifs_crawler import RedGifsCrawler
self.existing_crawlers['redgifs'] = RedGifsCrawler(self.manager)

async def rule34vault(self) -> None:
"""Creates a Rule34Vault Crawler instance"""
from cyberdrop_dl.scraper.crawlers.rule34vault_crawler import Rule34VaultCrawler
self.existing_crawlers['rule34vault'] = Rule34VaultCrawler(self.manager)

async def rule34xxx(self) -> None:
"""Creates a Rule34XXX Crawler instance"""
from cyberdrop_dl.scraper.crawlers.rule34xxx_crawler import Rule34XXXCrawler
Expand Down
2 changes: 1 addition & 1 deletion cyberdrop_dl/utils/dataclasses/supported_domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class SupportedDomains:
"jpg1.su", "jpg2.su", "jpg3.su", "jpg4.su", "host.church", "kemono",
"leakedmodels", "mediafire", "nudostar.com", "nudostar.tv",
"omegascans", "pimpandhost", "pixeldrain", "postimg", "realbooru",
"reddit", "redd.it", "redgifs", "rule34.xxx", "rule34.xyz", "saint",
"reddit", "redd.it", "redgifs", "rule34.xxx", "rule34.xyz", "rule34vault", "saint",
"scrolller", "simpcity", "socialmediagirls", "toonily", "xbunker",
"xbunkr")

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl"
version = "5.3.27"
version = "5.3.28"
description = "Bulk downloader for multiple file hosts"
authors = ["Jules Winnfield <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit 7769595

Please sign in to comment.