Skip to content

Commit

Permalink
Merge pull request #99 from jbsparrow/pixeldrain-text-scraping
Browse files Browse the repository at this point in the history
Pixeldrain text scraping
  • Loading branch information
datawhores authored Sep 19, 2024
2 parents 6675dbb + 8e9a829 commit f11af44
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 6 deletions.
16 changes: 12 additions & 4 deletions cyberdrop_dl/scraper/crawlers/pixeldrain_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ async def folder(self, scrape_item: ScrapeItem) -> None:
try:
filename, ext = await get_filename_and_ext(file['name'])
except NoExtensionFailure:
if "image" or "video" in file["mime_type"]:
if "image" in file["mime_type"] or "video" in file["mime_type"]:
filename, ext = await get_filename_and_ext(file['name'] + "." + file["mime_type"].split("/")[-1])
else:
raise NoExtensionFailure()
Expand All @@ -72,9 +72,17 @@ async def file(self, scrape_item: ScrapeItem) -> None:
try:
filename, ext = await get_filename_and_ext(JSON_Resp['name'])
except NoExtensionFailure:
if "image" or "video" in JSON_Resp["mime_type"]:
filename, ext = await get_filename_and_ext(
JSON_Resp['name'] + "." + JSON_Resp["mime_type"].split("/")[-1])
if "text/plain" in JSON_Resp["mime_type"]:
await scrape_item.add_to_parent_title(f"{JSON_Resp['name']} (Pixeldrain)")
async with self.request_limiter:
text = await self.client.get_text(self.domain, self.api_address / "file" / scrape_item.url.parts[-1])
lines = text.split("\n")
for line in lines:
link = URL(line)
new_scrape_item = await self.create_scrape_item(scrape_item, link, "", False, None, date)
await self.handle_external_links(new_scrape_item)
elif "image" in JSON_Resp["mime_type"] or "video" in JSON_Resp["mime_type"]:
filename, ext = await get_filename_and_ext(JSON_Resp['name'] + "." + JSON_Resp["mime_type"].split("/")[-1])
else:
raise NoExtensionFailure()
new_scrape_item = await self.create_scrape_item(scrape_item, link, "", False, None, date)
Expand Down
15 changes: 14 additions & 1 deletion cyberdrop_dl/utils/changelog.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
"""
------------------------------------------------------------
C\bCH\bHA\bAN\bNG\bGE\bEL\bLO\bOG\bG
\tVersion 5.6.20
D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
\tThis update introduces the following changes:
\t\t1. Ability to scrape URLs from PixelDrain text post
\tDetails:
\t\t- Cyberdrop-DL will now scrape URLs from PixelDrain text posts and make a folder for all the URLs within the post, reducing clutter.
\tFor more details, visit the wiki: https://script-ware.gitbook.io
C\bCH\bHA\bAN\bNG\bGE\bEL\bLO\bOG\bG
\tVersion 5.6.13
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "cyberdrop-dl-patched"
version = "5.6.13"
version = "5.6.21"
description = "Bulk downloader for multiple file hosts"
authors = ["Jacob B <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit f11af44

Please sign in to comment.