Skip to content
This repository has been archived by the owner on Jul 5, 2024. It is now read-only.

Commit

Permalink
Update bunkr crawler to not scrape as many pages directly.
Browse files Browse the repository at this point in the history
  • Loading branch information
Jules-WinnfieldX committed Dec 22, 2023
1 parent f98a3d0 commit ed95718
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 18 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5.1.21"
__version__ = "5.1.22"
2 changes: 1 addition & 1 deletion cyberdrop_dl/managers/client_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ async def check_http_status(self, response: ClientResponse, download: bool = Fal
if response_url in [URL("https://bnkr.b-cdn.net/maintenance-vid.mp4"),
URL("https://bnkr.b-cdn.net/maintenance.mp4"),
URL("https://bunkrr.su/magic/lovely.mp4"),]:
raise DownloadFailure(status=HTTPStatus.SERVICE_UNAVAILABLE, message="Bunkr under maintenance")
raise DownloadFailure(status="Bunkr Maintenance", message="Bunkr under maintenance")
if "imgur.com/removed" in str(response_url):
raise DownloadFailure(status=HTTPStatus.NOT_FOUND, message="Imgur image has been removed")

Expand Down
21 changes: 20 additions & 1 deletion cyberdrop_dl/scraper/crawlers/bunkrr_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,26 @@ async def album(self, scrape_item: ScrapeItem) -> None:
link = URL("https://" + scrape_item.url.host + link)
link = URL(link)
link = await self.get_stream_link(link)
self.manager.task_group.create_task(self.run(ScrapeItem(link, scrape_item.parent_title, True, date)))

try:
filename = card_listing.select_one("div[class*=details]").select_one("p").text
file_ext = "." + filename.split(".")[-1]
if file_ext.lower() not in FILE_FORMATS['Images'] and file_ext.lower() not in FILE_FORMATS['Videos']:
raise Exception()
image_obj = file.select_one("img")
src = image_obj.get("src")
src = src.replace("/thumbs", "")
src = URL(src)
src = src.with_suffix(file_ext).with_query("download=true")
new_scrape_item = await self.create_scrape_item(scrape_item, link, "", True, date)

if await self.check_complete_from_referer(scrape_item):
continue

filename, ext = await get_filename_and_ext(src.name)
await self.handle_file(src, new_scrape_item, filename, ext)
except Exception as e:
self.manager.task_group.create_task(self.run(ScrapeItem(link, scrape_item.parent_title, True, date)))

@error_handling_wrapper
async def video(self, scrape_item: ScrapeItem) -> None:
Expand Down
16 changes: 2 additions & 14 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl"
version = "5.1.21"
version = "5.1.22"
description = "Bulk downloader for multiple file hosts"
authors = ["Jules Winnfield <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit ed95718

Please sign in to comment.