Skip to content

Commit

Permalink
Merge pull request #119 from jbsparrow/URLs-forum-categorization
Browse files Browse the repository at this point in the history
Add link categorization to the URLs file.
  • Loading branch information
jbsparrow authored Sep 22, 2024
2 parents c04cbfc + df8fce0 commit b4dc002
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 220 deletions.
35 changes: 25 additions & 10 deletions cyberdrop_dl/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,26 +310,41 @@ async def load_links(self) -> None:
"""Loads links from args / input file"""
input_file = self.manager.path_manager.input_file

links = []
links = {'': []}
if not self.manager.args_manager.other_links:
block_quote = False
thread_title = ""
async with aiofiles.open(input_file, "r", encoding="utf8") as f:
async for line in f:
assert isinstance(line, str)
block_quote = not block_quote if line == "#\n" else block_quote
if not block_quote:
links.extend(await self.regex_links(line))

if line.startswith("---") or line.startswith("==="):
thread_title = line.replace("---", "").replace("===", "").strip()
if thread_title:
if thread_title not in links.keys():
links[thread_title] = []

if thread_title:
links[thread_title].extend(await self.regex_links(line))
else:
block_quote = not block_quote if line == "#\n" else block_quote
if not block_quote:
links[''].extend(await self.regex_links(line))
else:
links.extend(self.manager.args_manager.other_links)
links = list(filter(None, links))
links[''].extend(self.manager.args_manager.other_links)

links = {k: list(filter(None, v)) for k, v in links.items()}
items = []

if not links:
await log("No valid links found.", 30)
for link in links:
item = self.get_item_from_link(link)
if await self.filter_items(item):
items.append(item)
for title in links:
for url in links[title]:
item = self.get_item_from_link(url)
await item.add_to_parent_title(title)
item.part_of_album = True
if await self.filter_items(item):
items.append(item)
for item in items:
self.manager.task_group.create_task(self.add_item_to_group(item))

Expand Down
20 changes: 20 additions & 0 deletions cyberdrop_dl/utils/changelog.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,26 @@
------------------------------------------------------------
C\bCH\bHA\bAN\bNG\bGE\bEL\bLO\bOG\bG
\tVersion 5.6.32
D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
\tThis update introduces the following changes:
\t\t1. Add new URLs categorization feature for the URLs.txt file.
\tDetails:
\t\t- You can now group links under one download folder by adding a category name above the links in the URLs.txt file.
\t\t- The category name must be prefixed by three dashes (---) and must be on a new line.
\t\t- The category name will be used as the folder name for the links that follow it.
\t\t- To end a category, add three dashes (---) on a new line after the links.
\t\t- You can have multiple categories in the URLs.txt file, and the links will be grouped accordingly.
\tFor more details, visit the wiki: https://script-ware.gitbook.io
------------------------------------------------------------
C\bCH\bHA\bAN\bNG\bGE\bEL\bLO\bOG\bG
\tVersion 5.6.30
Expand Down
Loading

0 comments on commit b4dc002

Please sign in to comment.