From db37a38d0661628d0664424e1a2bf725f051d803 Mon Sep 17 00:00:00 2001 From: Jacob Date: Sun, 5 Jan 2025 18:25:08 -0500 Subject: [PATCH 1/3] Add a xenforo crawler for forum.allporncomix.com Added a xenforo crawler for https://forum.allporncomix.com --- .../authentication_settings.py | 3 +++ cyberdrop_dl/scraper/__init__.py | 1 + .../scraper/crawlers/allporncomix_crawler.py | 25 +++++++++++++++++++ 3 files changed, 29 insertions(+) create mode 100644 cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py diff --git a/cyberdrop_dl/config_definitions/authentication_settings.py b/cyberdrop_dl/config_definitions/authentication_settings.py index 3ae841b8e..d76de31d2 100644 --- a/cyberdrop_dl/config_definitions/authentication_settings.py +++ b/cyberdrop_dl/config_definitions/authentication_settings.py @@ -4,6 +4,9 @@ class ForumAuth(BaseModel): + allporncomix_xf_user_cookie: str = "" + allporncomix_username: str = "" + allporncomix_password: str = "" celebforum_xf_user_cookie: str = "" celebforum_username: str = "" celebforum_password: str = "" diff --git a/cyberdrop_dl/scraper/__init__.py b/cyberdrop_dl/scraper/__init__.py index 3f9bf262c..cd823b251 100644 --- a/cyberdrop_dl/scraper/__init__.py +++ b/cyberdrop_dl/scraper/__init__.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING from cyberdrop_dl import __version__ as current_version +from cyberdrop_dl.scraper.crawlers.allporncomix_crawler import AllPornComixCrawler from cyberdrop_dl.scraper.crawlers.bunkrr_crawler import BunkrrCrawler from cyberdrop_dl.scraper.crawlers.celebforum_crawler import CelebForumCrawler from cyberdrop_dl.scraper.crawlers.chevereto_crawler import CheveretoCrawler diff --git a/cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py b/cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py new file mode 100644 index 000000000..3ee2f0501 --- /dev/null +++ b/cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +from yarl import URL + +from .xenforo_crawler import PostSelectors, Selector, XenforoCrawler, XenforoSelectors + +if TYPE_CHECKING: + from cyberdrop_dl.managers.manager import Manager + + +class AllPornComixCrawler(XenforoCrawler): + primary_base_domain = URL("https://forum.allporncomix.com") + domain = "allporncomix" + post_selectors = PostSelectors( + content=Selector("div[class=bbWrapper]", None), + images=Selector("img[class*=bbImage]", "data-src"), + date=Selector("time", "datetime"), + attachments=Selector("section[class=message-attachments] .attachmentList .file .file-preview", "href") + ) + selectors = XenforoSelectors(posts=post_selectors) + + def __init__(self, manager: Manager) -> None: + super().__init__(manager, self.domain, "AllPornComix") From 9a743b6459777cf1695f78b9b2ad5754b00eb2ab Mon Sep 17 00:00:00 2001 From: Jacob Date: Sun, 5 Jan 2025 18:28:13 -0500 Subject: [PATCH 2/3] Add cache filter for allporncomix --- cyberdrop_dl/scraper/filters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cyberdrop_dl/scraper/filters.py b/cyberdrop_dl/scraper/filters.py index 4a9b4d13b..0548a3b3d 100644 --- a/cyberdrop_dl/scraper/filters.py +++ b/cyberdrop_dl/scraper/filters.py @@ -139,6 +139,7 @@ async def check_kemono_page(response: ClientResponse): return False, "Discord channel page" filter_dict = { + "allporncomix.com": check_xenforo_last_page, "celebforum.to": check_xenforo_last_page, "f95zone.to": check_xenforo_last_page, "leakedmodels.com": check_xenforo_last_page, From 56197ee5bb71d84291efd53cff4c6c1eba428b3f Mon Sep 17 00:00:00 2001 From: Jacob Date: Sun, 5 Jan 2025 18:28:33 -0500 Subject: [PATCH 3/3] Ruff formatting --- cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py b/cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py index 3ee2f0501..d79ef01c7 100644 --- a/cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/allporncomix_crawler.py @@ -17,7 +17,7 @@ class AllPornComixCrawler(XenforoCrawler): content=Selector("div[class=bbWrapper]", None), images=Selector("img[class*=bbImage]", "data-src"), date=Selector("time", "datetime"), - attachments=Selector("section[class=message-attachments] .attachmentList .file .file-preview", "href") + attachments=Selector("section[class=message-attachments] .attachmentList .file .file-preview", "href"), ) selectors = XenforoSelectors(posts=post_selectors)