From d23289acf363c678f227026b04e5da4fd4a27502 Mon Sep 17 00:00:00 2001 From: Jacob Date: Mon, 13 Jan 2025 15:09:36 -0500 Subject: [PATCH 01/12] Add crawler for members.luscious.net Add a crawler for https://members.luscious.net. Not fully complete yet. --- cyberdrop_dl/clients/scraper_client.py | 22 +-- cyberdrop_dl/scraper/__init__.py | 1 + .../scraper/crawlers/luscious_crawler.py | 134 ++++++++++++++++++ 3 files changed, 149 insertions(+), 8 deletions(-) create mode 100644 cyberdrop_dl/scraper/crawlers/luscious_crawler.py diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index f5f4b5bf9..16683e91d 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -206,19 +206,25 @@ async def post_data( domain: str, url: URL, client_session: CachedSession, - data: dict, + data: dict | str, req_resp: bool = True, raw: bool = False, origin: ScrapeItem | URL | None = None, + cache_disabled: bool = False, + headers_inc: dict | None = None, ) -> dict | bytes: """Returns a JSON object from the given URL when posting data. If raw == True, returns raw binary data of response.""" - async with client_session.post( - url, - headers=self._headers, - ssl=self.client_manager.ssl_context, - proxy=self.client_manager.proxy, - data=data, - ) as response: + headers = self._headers | headers_inc if headers_inc else self._headers + async with ( + cache_control_manager(client_session, disabled=cache_disabled), + client_session.post( + url, + headers=headers, + ssl=self.client_manager.ssl_context, + proxy=self.client_manager.proxy, + data=data, + ) as response, + ): await self.client_manager.check_http_status(response, origin=origin) if req_resp: content = await response.content.read() diff --git a/cyberdrop_dl/scraper/__init__.py b/cyberdrop_dl/scraper/__init__.py index d058eb04b..936290870 100644 --- a/cyberdrop_dl/scraper/__init__.py +++ b/cyberdrop_dl/scraper/__init__.py @@ -25,6 +25,7 @@ from cyberdrop_dl.scraper.crawlers.imgur_crawler import ImgurCrawler from cyberdrop_dl.scraper.crawlers.kemono_crawler import KemonoCrawler from cyberdrop_dl.scraper.crawlers.leakedmodels_crawler import LeakedModelsCrawler +from cyberdrop_dl.scraper.crawlers.luscious_crawler import LusciousCrawler from cyberdrop_dl.scraper.crawlers.mediafire_crawler import MediaFireCrawler from cyberdrop_dl.scraper.crawlers.nekohouse_crawler import NekohouseCrawler from cyberdrop_dl.scraper.crawlers.nudostar_crawler import NudoStarCrawler diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py new file mode 100644 index 000000000..1e1655593 --- /dev/null +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -0,0 +1,134 @@ +from __future__ import annotations + +import contextlib +import json +from typing import TYPE_CHECKING + +from aiolimiter import AsyncLimiter +from yarl import URL + +from cyberdrop_dl.scraper.crawler import Crawler, create_task_id +from cyberdrop_dl.utils.data_enums_classes.url_objects import FILE_HOST_ALBUM, ScrapeItem +from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext + +if TYPE_CHECKING: + from collections.abc import AsyncGenerator + + from cyberdrop_dl.managers.manager import Manager + + +class LusciousCrawler(Crawler): + primary_base_domain = URL("https://members.luscious.net") + + def __init__(self, manager: Manager) -> None: + super().__init__(manager, "luscious", "Luscious") + self.request_limiter = AsyncLimiter(10, 1) + self.graphql_url = URL("https://members.luscious.net/graphql/nobatch/") + self.graphql_queries = { + "AlbumGet": "\n query AlbumGet($id: ID!) {\n album {\n get(id: $id) {\n ... on Album {\n ...AlbumStandard\n }\n ... on MutationError {\n errors {\n code\n message\n }\n }\n }\n }\n}\n \n fragment AlbumStandard on Album {\n __typename\n id\n title\n labels\n description\n created\n modified\n like_status\n number_of_favorites\n number_of_dislikes\n moderation_status\n marked_for_deletion\n marked_for_processing\n number_of_pictures\n number_of_animated_pictures\n number_of_duplicates\n slug\n is_manga\n url\n download_url\n permissions\n created_by {\n id\n url\n name\n display_name\n user_title\n avatar_url\n }\n content {\n id\n title\n url\n }\n language {\n id\n title\n url\n }\n tags {\n category\n text\n url\n count\n }\n genres {\n id\n title\n slug\n url\n }\n audiences {\n id\n title\n url\n }\n is_featured\n featured_date\n featured_by {\n id\n url\n name\n display_name\n user_title\n avatar_url\n }\n}\n ", + "AlbumListOwnPictures": "\n query AlbumListOwnPictures($input: PictureListInput!) {\n picture {\n list(input: $input) {\n info {\n ...FacetCollectionInfo\n }\n items {\n ...PictureStandardWithoutAlbum\n }\n }\n }\n}\n\nfragment FacetCollectionInfo on FacetCollectionInfo {\n page\n has_next_page\n has_previous_page\n total_items\n total_pages\n items_per_page\n url_complete\n url_filters_only\n}\n\nfragment PictureStandardWithoutAlbum on Picture {\n __typename\n id\n title\n created\n like_status\n number_of_comments\n number_of_favorites\n status\n width\n height\n resolution\n aspect_ratio\n url_to_original\n url_to_video\n is_animated\n position\n tags {\n id\n category\n text\n url\n }\n permissions\n url\n thumbnails {\n width\n height\n size\n url\n }\n}\n ", + "PictureListInsideAlbum": "\n query PictureListInsideAlbum($input: PictureListInput!) {\n picture {\n list(input: $input) {\n info {\n ...FacetCollectionInfo\n }\n items {\n __typename\n id\n title\n description\n created\n like_status\n number_of_comments\n number_of_favorites\n moderation_status\n width\n height\n resolution\n aspect_ratio\n url_to_original\n url_to_video\n is_animated\n position\n permissions\n url\n tags {\n category\n text\n url\n }\n thumbnails {\n width\n height\n size\n url\n }\n }\n }\n }\n}\n \n fragment FacetCollectionInfo on FacetCollectionInfo {\n page\n has_next_page\n has_previous_page\n total_items\n total_pages\n items_per_page\n url_complete\n}\n ", + } + + """~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~""" + + @create_task_id + async def fetch(self, scrape_item: ScrapeItem) -> None: + """Determines where to send the scrape item based on the url.""" + + if "albums" in scrape_item.url.parts and "read" not in scrape_item.url.parts: + await self.album(scrape_item) + + async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> str: + """Creates a graphql query.""" + album_id = scrape_item.album_id + if operation == "PictureListInsideAlbum": + query = scrape_item.url.query + + sorting = query.get("sorting", "position") + only_animated = query.get("only_animated", "false") + + filters = [{"name": "album_id", "value": f"{album_id}"}] + if only_animated == "true": + filters.append({"name": "is_animated", "value": "1"}) + + data = { + "id": "1", + "operationName": operation, + "query": self.graphql_queries[operation], + "variables": { + "input": { + "display": sorting, + "filters": filters, + "items_per_page": 50, + "page": page, + } + }, + } + elif operation == "AlbumGet": + data = { + "id": "1", + "operationName": operation, + "query": self.graphql_queries[operation], + "variables": {"id": f"{album_id}"}, + } + + return json.dumps(data) + + async def album_pager(self, scrape_item: ScrapeItem) -> AsyncGenerator[dict]: + """Generator for album pages.""" + page = int(scrape_item.url.query.get("page", 1)) + while True: + query = await self.create_graphql_query("PictureListInsideAlbum", scrape_item, page) + async with self.request_limiter: + json_data = await self.client.post_data( + self.domain, + self.graphql_url.with_query({"operationName": "PictureListInsideAlbum"}), + data=query, + headers_inc={"Content-Type": "application/json"}, + origin=scrape_item, + ) + has_next_page = json_data["data"]["picture"]["list"]["info"]["has_next_page"] + yield json_data + if has_next_page: + page += 1 + continue + break + + @error_handling_wrapper + async def album(self, scrape_item: ScrapeItem) -> None: + """Scrapes an album.""" + album_id = int(scrape_item.url.parts[-1].split("_")[-1]) + results = await self.get_album_results(album_id) + scrape_item.album_id = album_id + scrape_item.part_of_album = True + scrape_item.type = FILE_HOST_ALBUM + scrape_item.children = scrape_item.children_limit = 0 + + with contextlib.suppress(IndexError, TypeError): + scrape_item.children_limit = ( + self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] + ) + + # Get album information + async with self.request_limiter: + query = await self.create_graphql_query("AlbumGet", scrape_item) + json_data = await self.client.post_data( + self.domain, + self.graphql_url.with_query({"operationName": "AlbumGet"}), + data=query, + headers_inc={"Content-Type": "application/json"}, + origin=scrape_item, + ) + + album_title = json_data["data"]["album"]["get"]["title"] + title = self.create_title(album_title, album_id, None) + scrape_item.add_to_parent_title(title) + + async for json_data in self.album_pager(scrape_item): + for item in json_data["data"]["picture"]["list"]["items"]: + link = URL(item["url_to_original"]) + filename, ext = get_filename_and_ext(link.name) + if not self.check_album_results(link, results): + await self.handle_file(link, scrape_item, filename, ext) + scrape_item.add_children() From adcf8997d0f391274a8a173a0d16b477da1eaf49 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:34:27 -0500 Subject: [PATCH 02/12] Update cyberdrop_dl/scraper/crawlers/luscious_crawler.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 1e1655593..cb3c56688 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -36,8 +36,10 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - if "albums" in scrape_item.url.parts and "read" not in scrape_item.url.parts: - await self.album(scrape_item) + if not "albums" in scrape_item.url.parts or "read" in scrape_item.url.parts: + log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) + return + await self.album(scrape_item) async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> str: """Creates a graphql query.""" From 4bb6b4ce0296c7115efd82d1e53e09436fd21497 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:34:34 -0500 Subject: [PATCH 03/12] Update cyberdrop_dl/scraper/crawlers/luscious_crawler.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index cb3c56688..2034053ec 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -9,6 +9,7 @@ from cyberdrop_dl.scraper.crawler import Crawler, create_task_id from cyberdrop_dl.utils.data_enums_classes.url_objects import FILE_HOST_ALBUM, ScrapeItem +from cyberdrop_dl.utils.logger import log from cyberdrop_dl.utils.utilities import error_handling_wrapper, get_filename_and_ext if TYPE_CHECKING: From 735842e29e368abbbc562072eeaa6303863f20a2 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:35:15 -0500 Subject: [PATCH 04/12] Update cyberdrop_dl/scraper/crawlers/luscious_crawler.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 2034053ec..7c2d24737 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -125,7 +125,7 @@ async def album(self, scrape_item: ScrapeItem) -> None: ) album_title = json_data["data"]["album"]["get"]["title"] - title = self.create_title(album_title, album_id, None) + title = self.create_title(album_title, album_id) scrape_item.add_to_parent_title(title) async for json_data in self.album_pager(scrape_item): From f2dd683e566ee17d6f9422202114a16928ca8b27 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:35:54 -0500 Subject: [PATCH 05/12] Update cyberdrop_dl/scraper/crawlers/luscious_crawler.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 7c2d24737..6f979c57a 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -105,13 +105,8 @@ async def album(self, scrape_item: ScrapeItem) -> None: results = await self.get_album_results(album_id) scrape_item.album_id = album_id scrape_item.part_of_album = True - scrape_item.type = FILE_HOST_ALBUM - scrape_item.children = scrape_item.children_limit = 0 + scrape_item.set_type(FILE_HOST_ALBUM, self.manager) - with contextlib.suppress(IndexError, TypeError): - scrape_item.children_limit = ( - self.manager.config_manager.settings_data.download_options.maximum_number_of_children[scrape_item.type] - ) # Get album information async with self.request_limiter: From 46d16405b7a31adf3cf7c330ee866b54b0708d9d Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:36:54 -0500 Subject: [PATCH 06/12] Update cyberdrop_dl/scraper/crawlers/luscious_crawler.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 6f979c57a..581e3e43b 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -125,7 +125,8 @@ async def album(self, scrape_item: ScrapeItem) -> None: async for json_data in self.album_pager(scrape_item): for item in json_data["data"]["picture"]["list"]["items"]: - link = URL(item["url_to_original"]) + link_str: str = item["url_to_original"] + link = self.parse_url(link_str) filename, ext = get_filename_and_ext(link.name) if not self.check_album_results(link, results): await self.handle_file(link, scrape_item, filename, ext) From e962db8366ab701e586f7a485bb31648d93c27c3 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:37:10 -0500 Subject: [PATCH 07/12] Update cyberdrop_dl/clients/scraper_client.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- cyberdrop_dl/clients/scraper_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cyberdrop_dl/clients/scraper_client.py b/cyberdrop_dl/clients/scraper_client.py index 16683e91d..191594d2c 100644 --- a/cyberdrop_dl/clients/scraper_client.py +++ b/cyberdrop_dl/clients/scraper_client.py @@ -206,7 +206,7 @@ async def post_data( domain: str, url: URL, client_session: CachedSession, - data: dict | str, + data: dict, req_resp: bool = True, raw: bool = False, origin: ScrapeItem | URL | None = None, From 31ded00ba776f5dc51f8c7a5ca884b6dea32a5af Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:39:55 -0500 Subject: [PATCH 08/12] Update cyberdrop_dl/scraper/crawlers/luscious_crawler.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 581e3e43b..d2419231d 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -42,7 +42,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: return await self.album(scrape_item) - async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> str: + async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> dict: """Creates a graphql query.""" album_id = scrape_item.album_id if operation == "PictureListInsideAlbum": From 4d2e2964f1a7e1a9264efaf26bb352fffed39a73 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:40:04 -0500 Subject: [PATCH 09/12] Update cyberdrop_dl/scraper/crawlers/luscious_crawler.py Co-authored-by: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> --- .../scraper/crawlers/luscious_crawler.py | 36 ++++++++----------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index d2419231d..295e81e32 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -45,6 +45,11 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> dict: """Creates a graphql query.""" album_id = scrape_item.album_id + data = { + "id": "1", + "operationName": operation, + "query": self.graphql_queries[operation] + } if operation == "PictureListInsideAlbum": query = scrape_item.url.query @@ -55,28 +60,17 @@ async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, pa if only_animated == "true": filters.append({"name": "is_animated", "value": "1"}) - data = { - "id": "1", - "operationName": operation, - "query": self.graphql_queries[operation], - "variables": { - "input": { - "display": sorting, - "filters": filters, - "items_per_page": 50, - "page": page, - } - }, - } + data ["variables"] = { + "input": { + "display": sorting, + "filters": filters, + "items_per_page": 50, + "page": page, + } + } elif operation == "AlbumGet": - data = { - "id": "1", - "operationName": operation, - "query": self.graphql_queries[operation], - "variables": {"id": f"{album_id}"}, - } - - return json.dumps(data) + data["variables"] = {"id": f"{album_id}"} + return data async def album_pager(self, scrape_item: ScrapeItem) -> AsyncGenerator[dict]: """Generator for album pages.""" From 7102d55e92936ae0fa1c4cf388995aa52debc62d Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:40:41 -0500 Subject: [PATCH 10/12] Ruff fixes --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 295e81e32..35b3c82cd 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -1,7 +1,5 @@ from __future__ import annotations -import contextlib -import json from typing import TYPE_CHECKING from aiolimiter import AsyncLimiter @@ -37,7 +35,7 @@ def __init__(self, manager: Manager) -> None: async def fetch(self, scrape_item: ScrapeItem) -> None: """Determines where to send the scrape item based on the url.""" - if not "albums" in scrape_item.url.parts or "read" in scrape_item.url.parts: + if "albums" not in scrape_item.url.parts or "read" in scrape_item.url.parts: log(f"Scrape Failed: Unknown URL Path for {scrape_item.url}", 40) return await self.album(scrape_item) @@ -45,11 +43,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> dict: """Creates a graphql query.""" album_id = scrape_item.album_id - data = { - "id": "1", - "operationName": operation, - "query": self.graphql_queries[operation] - } + data = {"id": "1", "operationName": operation, "query": self.graphql_queries[operation]} if operation == "PictureListInsideAlbum": query = scrape_item.url.query @@ -60,14 +54,14 @@ async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, pa if only_animated == "true": filters.append({"name": "is_animated", "value": "1"}) - data ["variables"] = { + data["variables"] = { "input": { "display": sorting, "filters": filters, "items_per_page": 50, "page": page, } - } + } elif operation == "AlbumGet": data["variables"] = {"id": f"{album_id}"} return data @@ -101,7 +95,6 @@ async def album(self, scrape_item: ScrapeItem) -> None: scrape_item.part_of_album = True scrape_item.set_type(FILE_HOST_ALBUM, self.manager) - # Get album information async with self.request_limiter: query = await self.create_graphql_query("AlbumGet", scrape_item) From a3362d8f028f0897f69c294de511e13418111dae Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 18 Jan 2025 12:43:32 -0500 Subject: [PATCH 11/12] Re-add json dumps for query as string We have to return the query as a string. If we return it as a dict, the POST request will return a 400 error. I tried looking into why but had no luck. --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 35b3c82cd..58179e593 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -1,5 +1,6 @@ from __future__ import annotations +from json import dumps as dump_json from typing import TYPE_CHECKING from aiolimiter import AsyncLimiter @@ -40,7 +41,7 @@ async def fetch(self, scrape_item: ScrapeItem) -> None: return await self.album(scrape_item) - async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> dict: + async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, page: int = 1) -> str: """Creates a graphql query.""" album_id = scrape_item.album_id data = {"id": "1", "operationName": operation, "query": self.graphql_queries[operation]} @@ -64,7 +65,7 @@ async def create_graphql_query(self, operation: str, scrape_item: ScrapeItem, pa } elif operation == "AlbumGet": data["variables"] = {"id": f"{album_id}"} - return data + return dump_json(data) async def album_pager(self, scrape_item: ScrapeItem) -> AsyncGenerator[dict]: """Generator for album pages.""" From 8141212f7b747b4e271302ea03c548ee8f4cc17d Mon Sep 17 00:00:00 2001 From: NTFSvolume <172021377+NTFSvolume@users.noreply.github.com> Date: Sat, 18 Jan 2025 18:57:17 -0500 Subject: [PATCH 12/12] refactor: use base crawler limiter --- cyberdrop_dl/scraper/crawlers/luscious_crawler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py index 58179e593..13ebb6fd8 100644 --- a/cyberdrop_dl/scraper/crawlers/luscious_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/luscious_crawler.py @@ -22,7 +22,6 @@ class LusciousCrawler(Crawler): def __init__(self, manager: Manager) -> None: super().__init__(manager, "luscious", "Luscious") - self.request_limiter = AsyncLimiter(10, 1) self.graphql_url = URL("https://members.luscious.net/graphql/nobatch/") self.graphql_queries = { "AlbumGet": "\n query AlbumGet($id: ID!) {\n album {\n get(id: $id) {\n ... on Album {\n ...AlbumStandard\n }\n ... on MutationError {\n errors {\n code\n message\n }\n }\n }\n }\n}\n \n fragment AlbumStandard on Album {\n __typename\n id\n title\n labels\n description\n created\n modified\n like_status\n number_of_favorites\n number_of_dislikes\n moderation_status\n marked_for_deletion\n marked_for_processing\n number_of_pictures\n number_of_animated_pictures\n number_of_duplicates\n slug\n is_manga\n url\n download_url\n permissions\n created_by {\n id\n url\n name\n display_name\n user_title\n avatar_url\n }\n content {\n id\n title\n url\n }\n language {\n id\n title\n url\n }\n tags {\n category\n text\n url\n count\n }\n genres {\n id\n title\n slug\n url\n }\n audiences {\n id\n title\n url\n }\n is_featured\n featured_date\n featured_by {\n id\n url\n name\n display_name\n user_title\n avatar_url\n }\n}\n ",