diff --git a/cyberdrop_dl/__init__.py b/cyberdrop_dl/__init__.py index ba80b5202..f272c3a64 100644 --- a/cyberdrop_dl/__init__.py +++ b/cyberdrop_dl/__init__.py @@ -1 +1 @@ -__version__ = "5.0.25" +__version__ = "5.0.26" diff --git a/cyberdrop_dl/scraper/crawlers/coomer_crawler.py b/cyberdrop_dl/scraper/crawlers/coomer_crawler.py index 2265b39a9..20cb6962e 100644 --- a/cyberdrop_dl/scraper/crawlers/coomer_crawler.py +++ b/cyberdrop_dl/scraper/crawlers/coomer_crawler.py @@ -52,6 +52,7 @@ async def profile(self, scrape_item: ScrapeItem) -> None: """Scrapes a profile""" offset = 0 service, user = await self.get_service_and_user(scrape_item) + user_str = await self.get_user_str_from_profile(scrape_item) api_call = self.api_url / service / "user" / user while True: async with self.request_limiter: @@ -61,19 +62,20 @@ async def profile(self, scrape_item: ScrapeItem) -> None: break for post in JSON_Resp: - await self.handle_post_content(post, scrape_item, user) + await self.handle_post_content(post, scrape_item, user, user_str) @error_handling_wrapper async def post(self, scrape_item: ScrapeItem) -> None: """Scrapes a post""" service, user, post_id = await self.get_service_user_and_post(scrape_item) + user_str = await self.get_user_str_from_post(scrape_item) api_call = self.api_url / service / "user" / user / "post" / post_id async with self.request_limiter: post = await self.client.get_json(self.domain, api_call) - await self.handle_post_content(post, scrape_item, user) + await self.handle_post_content(post, scrape_item, user, user_str) @error_handling_wrapper - async def handle_post_content(self, post: Dict, scrape_item: ScrapeItem, user: str) -> None: + async def handle_post_content(self, post: Dict, scrape_item: ScrapeItem, user: str, user_str: str) -> None: """Handles the content of a post""" if "#ad" in post['content'] and self.manager.config_manager.settings_data['Ignore_Options']['ignore_coomer_ads']: return @@ -81,11 +83,13 @@ async def handle_post_content(self, post: Dict, scrape_item: ScrapeItem, user: s date = post["published"].replace("T", " ") post_id = post["id"] post_title = post["title"] + if not post_title: + post_title = "Untitled" async def handle_file(file_obj): link = self.primary_base_domain / ("data" + file_obj['path']) link = link.with_query({"f": file_obj['name']}) - await self.create_new_scrape_item(link, scrape_item, user, post_title, post_id, date) + await self.create_new_scrape_item(link, scrape_item, user_str, post_title, post_id, date) if post['file']: await handle_file(post['file']) @@ -106,6 +110,22 @@ async def parse_datetime(self, date: str) -> int: date = datetime.datetime.strptime(date, "%Y-%m-%d %H:%M:%S") return calendar.timegm(date.timetuple()) + @error_handling_wrapper + async def get_user_str_from_post(self, scrape_item: ScrapeItem) -> str: + """Gets the user string from a scrape item""" + async with self.request_limiter: + soup = await self.client.get_BS4(self.domain, scrape_item.url) + user = soup.select_one("a[class=post__user-name]").text + return user + + @error_handling_wrapper + async def get_user_str_from_profile(self, scrape_item: ScrapeItem) -> str: + """Gets the user string from a scrape item""" + async with self.request_limiter: + soup = await self.client.get_BS4(self.domain, scrape_item.url) + user = soup.select_one("span[itemprop=name]").text + return user + async def get_service_and_user(self, scrape_item: ScrapeItem) -> Tuple[str, str]: """Gets the service and user from a scrape item""" user = scrape_item.url.parts[3] diff --git a/pyproject.toml b/pyproject.toml index 1a1d64ae4..48cb7c098 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "cyberdrop-dl" -version = "5.0.25" +version = "5.0.26" description = "Bulk downloader for multiple file hosts" authors = ["Jules Winnfield "] readme = "README.md"