From 8f9f384ebdae2752ee912348eb287bd972603ebc Mon Sep 17 00:00:00 2001 From: Jules-WinnfieldX Date: Thu, 18 Apr 2024 14:19:01 -0600 Subject: [PATCH] Fix omegascans series scraping --- cyberdrop_dl/__init__.py | 2 +- .../scraper/crawlers/omegascans_crawler.py | 33 +++++++++++++++++++ pyproject.toml | 2 +- 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/cyberdrop_dl/__init__.py b/cyberdrop_dl/__init__.py index 30b54ae4c..59af62f81 100644 --- a/cyberdrop_dl/__init__.py +++ b/cyberdrop_dl/__init__.py @@ -1 +1 @@ -__version__ = "5.2.35" +__version__ = "5.2.36" diff --git a/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py b/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py index 17dd4e85b..c05e1483c 100644 --- a/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/omegascans_crawler.py @@ -20,6 +20,7 @@ class OmegaScansCrawler(Crawler): def __init__(self, manager: Manager): super().__init__(manager, "omegascans", "OmegaScans") self.primary_base_domain = URL("https://omegascans.org") + self.api_url = "https://api.omegascans.org/chapter/query?page={}&perPage={}&series_id={}" self.request_limiter = AsyncLimiter(10, 1) """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" @@ -30,11 +31,43 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: if "chapter" in scrape_item.url.name: await self.chapter(scrape_item) + elif "series" in scrape_item.url.parts: + await self.series(scrape_item) else: await self.handle_direct_link(scrape_item) await self.scraping_progress.remove_task(task_id) + @error_handling_wrapper + async def series(self, scrape_item: ScrapeItem) -> None: + """Scrapes an album""" + async with self.request_limiter: + soup = await self.client.get_BS4(self.domain, scrape_item.url) + + scripts = soup.select("script") + for script in scripts: + if "series_id" in script.get_text(): + series_id = script.get_text().split('series_id\\":')[1].split(",")[0] + break + + page_number = 1 + number_per_page = 30 + while True: + api_url = URL(self.api_url.format(page_number, number_per_page, series_id)) + async with self.request_limiter: + JSON_Obj = await self.client.get_json(self.domain, api_url) + if not JSON_Obj: + break + + for chapter in JSON_Obj['data']: + chapter_url = scrape_item.url / chapter['chapter_slug'] + new_scrape_item = await self.create_scrape_item(scrape_item, chapter_url, "", True) + self.manager.task_group.create_task(self.run(new_scrape_item)) + + if JSON_Obj['meta']['current_page'] == JSON_Obj['meta']['last_page']: + break + page_number += 1 + @error_handling_wrapper async def chapter(self, scrape_item: ScrapeItem) -> None: """Scrapes an image""" diff --git a/pyproject.toml b/pyproject.toml index 3a3a74c0b..a8bebac6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cyberdrop-dl" -version = "5.2.35" +version = "5.2.36" description = "Bulk downloader for multiple file hosts" authors = ["Jules Winnfield "] readme = "README.md"