Skip to content

Commit

Permalink
refactor: remove unnecessary async definitions
Browse files Browse the repository at this point in the history
For `filter_link` and `pre_filter_link`

Both methods shouldn't be necesarry with the new `parse_url` logic but i left them just in case
  • Loading branch information
NTFSvolume committed Jan 28, 2025
1 parent 9756a85 commit 5ab8bd1
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cyberdrop_dl/scraper/crawlers/f95zone_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ async def handle_confirmation_link(self, link: URL, *, origin: ScrapeItem | None
return self.parse_url(JSON_Resp["msg"])
return None

async def filter_link(self, link: URL) -> URL:
def filter_link(self, link: URL) -> URL:
if "thumb" in link.parts:
parts = [x for x in link.parts if x not in ("thumb", "/")]
new_path = "/".join(parts)
Expand Down
4 changes: 2 additions & 2 deletions cyberdrop_dl/scraper/crawlers/titsintops_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,15 @@ def __init__(self, manager: Manager) -> None:
super().__init__(manager, self.domain, "TitsInTops")
self.attachment_url_part = ["attachments", "data"]

async def filter_link(self, link: URL):
def filter_link(self, link: URL):
return URL(
str(link)
.replace("index.php%3F", "index.php/")
.replace("index.php?", "index.php/")
.replace("index.php/goto", "index.php?goto")
)

async def pre_filter_link(self, link):
def pre_filter_link(self, link):
return URL(str(link).replace("index.php?", "index.php/").replace("index.php%3F", "index.php/"))

def is_valid_post_link(self, link_obj: Tag) -> bool:
Expand Down
18 changes: 9 additions & 9 deletions cyberdrop_dl/scraper/crawlers/xenforo_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,15 +115,12 @@ async def async_startup(self) -> None:
if not self.logged_in:
await self.login_setup()

async def pre_filter_link(self, link: URL) -> URL:
return link

@create_task_id
async def fetch(self, scrape_item: ScrapeItem) -> None:
"""Determines where to send the scrape item based on the url."""
if not self.logged_in and self.login_required:
return
scrape_item.url = await self.pre_filter_link(scrape_item.url)
scrape_item.url = self.pre_filter_link(scrape_item.url)
if self.is_attachment(scrape_item.url):
await self.handle_internal_link(scrape_item.url, scrape_item)
elif self.thread_url_part in scrape_item.url.parts:
Expand Down Expand Up @@ -221,9 +218,6 @@ async def attachments(self, scrape_item: ScrapeItem, post: ForumPost) -> None:

"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""

async def filter_link(self, link: URL | None) -> URL | None:
return link

async def thread_pager(self, scrape_item: ScrapeItem) -> AsyncGenerator[BeautifulSoup]:
"""Generator of forum thread pages."""
page_url = scrape_item.url
Expand All @@ -236,7 +230,7 @@ async def thread_pager(self, scrape_item: ScrapeItem) -> AsyncGenerator[Beautifu
break
page_url_str: str = next_page.get(self.selectors.next_page.attribute)
page_url = self.parse_url(page_url_str)
page_url = await self.pre_filter_link(page_url)
page_url = self.pre_filter_link(page_url)

async def process_children(self, scrape_item: ScrapeItem, links: list[Tag], selector: str) -> None:
for link_obj in links:
Expand All @@ -254,7 +248,7 @@ async def process_children(self, scrape_item: ScrapeItem, links: list[Tag], sele
continue

link = await self.get_absolute_link(link_str)
link = await self.filter_link(link)
link = self.filter_link(link)
if not link:
continue
await self.handle_link(scrape_item, link)
Expand Down Expand Up @@ -343,6 +337,12 @@ def process_embed(self, data: str) -> str | None:
embed = re.search(HTTP_URL_PATTERNS[0], data) or re.search(HTTP_URL_PATTERNS[1], data)
return embed.group(0).replace("www.", "") if embed else data

def pre_filter_link(self, link: URL) -> URL:
return link

def filter_link(self, link: URL | None) -> URL | None:
return link

""" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""

@error_handling_wrapper
Expand Down

0 comments on commit 5ab8bd1

Please sign in to comment.