Skip to content

Commit

Permalink
merge master
Browse files Browse the repository at this point in the history
  • Loading branch information
datawhores committed Nov 19, 2024
2 parents 1c46c67 + b512732 commit 2b3b76c
Show file tree
Hide file tree
Showing 26 changed files with 854 additions and 879 deletions.
4 changes: 2 additions & 2 deletions cyberdrop_dl/clients/hash_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from send2trash import send2trash

from cyberdrop_dl.ui.prompts.continue_prompt import enter_to_continue
from cyberdrop_dl.ui.prompts.basic_prompts import enter_to_continue
from cyberdrop_dl.utils.logger import log

if TYPE_CHECKING:
Expand Down Expand Up @@ -55,7 +55,7 @@ async def startup(self) -> None:

async def hash_directory(self, path: Path) -> None:
path = Path(path)
async with self.manager.live_manager.get_hash_live(stop=True):
with self.manager.live_manager.get_hash_live(stop=True):
if not path.is_dir():
raise NotADirectoryError
for file in path.rglob("*"):
Expand Down
12 changes: 6 additions & 6 deletions cyberdrop_dl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
from cyberdrop_dl.clients.errors import InvalidYamlError
from cyberdrop_dl.managers.manager import Manager
from cyberdrop_dl.scraper.scraper import ScrapeMapper
from cyberdrop_dl.ui.ui import program_ui
from cyberdrop_dl.utils.args.browser_cookie_extraction import get_cookies_from_browser
from cyberdrop_dl.ui.program_ui import ProgramUI
from cyberdrop_dl.ui.prompts.user_prompts import get_cookies_from_browsers
from cyberdrop_dl.utils.logger import (
log,
log_spacer,
Expand Down Expand Up @@ -48,7 +48,7 @@ def startup() -> Manager:
manager.startup()

if not manager.args_manager.immediate_download:
program_ui(manager)
ProgramUI(manager)

except InvalidYamlError as e:
print_to_console(e.message_rich)
Expand Down Expand Up @@ -77,12 +77,12 @@ async def runtime(manager: Manager) -> None:
def pre_runtime(manager: Manager) -> None:
"""Actions to complete before main runtime."""
if manager.config_manager.settings_data["Browser_Cookies"]["auto_import"]:
get_cookies_from_browser(manager)
get_cookies_from_browsers(manager)


async def post_runtime(manager: Manager) -> None:
"""Actions to complete after main runtime, and before ui shutdown."""
log_spacer(20)
log_spacer(20, log_to_console=False)
log_with_color(
f"Running Post-Download Processes For Config: {manager.config_manager.loaded_config}",
"green",
Expand Down Expand Up @@ -172,7 +172,7 @@ def setup_logger(manager: Manager, config_name: str) -> None:


def ui_error_handling_wrapper(func: Callable) -> None:
"""Wrapper handles errors from the main UI"""
"""Wrapper handles errors from the main UI."""

@wraps(func)
async def wrapper(*args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion cyberdrop_dl/managers/args_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ def startup(self) -> None:
del self.parsed_args["download_dir"]
del self.parsed_args["appdata_dir"]
del self.parsed_args["config_file"]
del self.parsed_args["log_folder"]
del self.parsed_args["log_dir"]
del self.parsed_args["proxy"]
del self.parsed_args["links"]
del self.parsed_args["sort_downloads"]
Expand Down
4 changes: 2 additions & 2 deletions cyberdrop_dl/managers/config_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,10 @@ def load_configs(self) -> None:
self.write_updated_settings_config()

def return_verified(self, value) -> any:
if isinstance(value, int):
return int(value)
if isinstance(value, bool):
return bool(value)
if isinstance(value, int):
return int(value)
if isinstance(value, str):
return str(value)
if isinstance(value, list):
Expand Down
3 changes: 2 additions & 1 deletion cyberdrop_dl/managers/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ def args_logging(self) -> None:
async def close(self) -> None:
"""Closes the manager."""
await self.db_manager.close()
await self.client_manager.close()
if not isinstance(self.client_manager, field):
await self.client_manager.close()
self.db_manager: DBManager = field(init=False)
self.cache_manager: CacheManager = field(init=False)
self.hash_manager: HashManager = field(init=False)
151 changes: 64 additions & 87 deletions cyberdrop_dl/scraper/crawlers/cyberfile_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
class CyberfileCrawler(Crawler):
def __init__(self, manager: Manager) -> None:
super().__init__(manager, "cyberfile", "Cyberfile")
self.api_files = URL("https://cyberfile.me/account/ajax/load_files")
self.api_load_files = URL("https://cyberfile.me/account/ajax/load_files")
self.api_details = URL("https://cyberfile.me/account/ajax/file_details")
self.api_password_process = URL("https://cyberfile.me/ajax/folder_password_process")
self.request_limiter = AsyncLimiter(5, 1)
Expand Down Expand Up @@ -50,15 +50,14 @@ async def folder(self, scrape_item: ScrapeItem) -> None:

login = soup.select_one("form[id=form_login]")
if login:
raise ScrapeError(404, "Folder has been deleted", origin=scrape_item)
raise ScrapeError(410, "Folder has been deleted", origin=scrape_item)

script_func = soup.select('div[class*="page-container"] script')[-1].text
script_func = script_func.split("loadImages(")[-1]
script_func = script_func.split(";")[0]
nodeId = int(script_func.split(",")[1].replace("'", ""))
scrape_item.album_id = scrape_item.url.parts[2]
scrape_item.part_of_album = True
password = scrape_item.url.query.get("password", "")
# Do not reset if nested folder
if scrape_item.type != FILE_HOST_ALBUM:
scrape_item.type = FILE_HOST_ALBUM
Expand All @@ -72,33 +71,9 @@ async def folder(self, scrape_item: ScrapeItem) -> None:
page = 1
while True:
data = {"pageType": "folder", "nodeId": nodeId, "pageStart": page, "perPage": 0, "filterOrderBy": ""}
async with self.request_limiter:
ajax_dict: dict = await self.client.post_data(
self.domain,
self.api_files,
data=data,
origin=scrape_item,
)
if "Password Required" in ajax_dict["html"]:
password_data = {"folderPassword": password, "folderId": nodeId, "submitme": 1}
password_response: dict = await self.client.post_data(
self.domain,
self.api_password_process,
data=password_data,
origin=scrape_item,
)
if not password_response.get("success"):
raise PasswordProtectedError(origin=scrape_item)
ajax_dict: dict = await self.client.post_data(
self.domain,
self.api_files,
data=data,
origin=scrape_item,
)

ajax_soup = BeautifulSoup(ajax_dict["html"].replace("\\", ""), "html.parser")
ajax_soup, ajax_title = await self.get_soup_from_ajax(data, scrape_item)

title = self.create_title(ajax_dict["page_title"], scrape_item.album_id, None)
title = self.create_title(ajax_title, scrape_item.album_id, None)
num_pages = int(
ajax_soup.select("a[onclick*=loadImages]")[-1].get("onclick").split(",")[2].split(")")[0].strip(),
)
Expand All @@ -112,17 +87,7 @@ async def folder(self, scrape_item: ScrapeItem) -> None:
link = URL(tile.get("sharing-url"))
elif file_id:
link = URL(tile.get("dtfullurl"))
if link:
new_scrape_item = self.create_scrape_item(
scrape_item,
link,
title,
True,
add_parent=scrape_item.url,
)
self.manager.task_group.create_task(self.run(new_scrape_item))
else:
log(f"Couldn't find folder or file id for {scrape_item.url} element", 30)
if not link:
continue

new_scrape_item = self.create_scrape_item(
Expand Down Expand Up @@ -169,36 +134,22 @@ async def shared(self, scrape_item: ScrapeItem) -> None:
"perPage": 0,
"filterOrderBy": "",
}
async with self.request_limiter:
ajax_dict = await self.client.post_data("cyberfile", self.api_files, data=data, origin=scrape_item)
ajax_soup = BeautifulSoup(ajax_dict["html"].replace("\\", ""), "html.parser")
title = self.create_title(ajax_dict["page_title"], scrape_item.url.parts[2], None)

ajax_soup, ajax_title = await self.get_soup_from_ajax(data, scrape_item)
title = self.create_title(ajax_title, scrape_item.url.parts[2], None)
num_pages = int(ajax_soup.select_one("input[id=rspTotalPages]").get("value"))

tile_listings = ajax_soup.select("div[class=fileListing] div[class*=fileItem]")
for tile in tile_listings:
folder_id = tile.get("folderid")
file_id = tile.get("fileid")

link = None
if folder_id:
new_folders.append(folder_id)
continue
if file_id:
link = URL(tile.get("dtfullurl"))

if link:
new_scrape_item = self.create_scrape_item(
scrape_item,
link,
title,
True,
add_parent=scrape_item.url,
)
self.manager.task_group.create_task(self.run(new_scrape_item))

else:
log(f"Couldn't find folder or file id for {scrape_item.url} element", 30)
if not link:
continue

new_scrape_item = self.create_scrape_item(
Expand All @@ -225,24 +176,9 @@ async def shared(self, scrape_item: ScrapeItem) -> None:
@error_handling_wrapper
async def file(self, scrape_item: ScrapeItem) -> None:
"""Scrapes a file."""
password = scrape_item.url.query.get("password", "")
scrape_item.url = scrape_item.url.with_query(None)
contentId = None
async with self.request_limiter:
soup: BeautifulSoup = await self.client.get_soup(self.domain, scrape_item.url, origin=scrape_item)
if "Enter File Password" in soup.text:
password_data = {"filePassword": password, "submitted": 1}
soup = BeautifulSoup(
await self.client.post_data(
self.domain,
scrape_item.url,
data=password_data,
raw=True,
origin=scrape_item,
),
"html.parser",
)
if "File password is invalid" in soup.text:
raise PasswordProtectedError(origin=scrape_item)

script_funcs = soup.select("script")
for script in script_funcs:
Expand All @@ -252,36 +188,31 @@ async def file(self, scrape_item: ScrapeItem) -> None:
contentId_a = next(x for x in contentId_a if x[0].isdigit())
contentId_b = contentId_a.split(");")[0]
contentId = int(contentId_b)
await self.handle_content_id(scrape_item, contentId)
return
break

if not contentId:
raise ScrapeError(422, message="contentId not found", origin=ScrapeItem)
await self.handle_content_id(scrape_item, contentId)

@error_handling_wrapper
async def handle_content_id(self, scrape_item: ScrapeItem, content_id: int) -> None:
"""Scrapes a file using the content id."""
data = {"u": content_id}
async with self.request_limiter:
ajax_dict = await self.client.post_data(self.domain, self.api_details, data=data, origin=scrape_item)
ajax_soup = BeautifulSoup(ajax_dict["html"].replace("\\", ""), "html.parser")

if "albumPasswordModel" in ajax_dict["html"]:
raise PasswordProtectedError(origin=scrape_item)

ajax_soup, _ = await self.get_soup_from_ajax(data, scrape_item, file=True)
file_menu = ajax_soup.select_one('ul[class="dropdown-menu dropdown-info account-dropdown-resize-menu"] li a')
file_button = ajax_soup.select_one('div[class="btn-group responsiveMobileMargin"] button')
try:
html_download_text = file_menu.get("onclick") if file_menu else file_button.get("onclick")
except AttributeError:
log(f"Couldn't find download button for {scrape_item.url}", 30)
raise ScrapeError(422, "Couldn't find download button", origin=scrape_item) from None
link = URL(html_download_text.split("'")[1])

link = URL(html_download_text.split("'")[1])
file_detail_table = ajax_soup.select('table[class="table table-bordered table-striped"]')[-1]
uploaded_row = file_detail_table.select("tr")[-2]
uploaded_date = uploaded_row.select_one("td[class=responsiveTable]").text.strip()
uploaded_date = self.parse_datetime(uploaded_date)
scrape_item.possible_datetime = uploaded_date

filename, ext = get_filename_and_ext(link.name)
filename, ext = get_filename_and_ext(ajax_soup.title or link.name)
await self.handle_file(link, scrape_item, filename, ext)

"""~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"""
Expand All @@ -291,3 +222,49 @@ def parse_datetime(date: str) -> int:
"""Parses a datetime string into a unix timestamp."""
date = datetime.datetime.strptime(date, "%d/%m/%Y %H:%M:%S")
return calendar.timegm(date.timetuple())

async def get_soup_from_ajax(
self, data: dict, scrape_item: ScrapeItem, file: bool = False
) -> tuple[BeautifulSoup, str]:
password = scrape_item.url.query.get("password", "")
final_entrypoint = self.api_details if file else self.api_load_files
async with self.request_limiter:
ajax_dict: dict = await self.client.post_data(
self.domain,
final_entrypoint,
data=data,
origin=scrape_item,
)

ajax_soup = BeautifulSoup(ajax_dict["html"].replace("\\", ""), "html.parser")

if "Password Required" in ajax_dict["html"]:
if not password:
raise PasswordProtectedError(origin=scrape_item)

soup_nodeId = ajax_soup.select_one("#folderId")
# override if data has it
nodeId = data.get("nodeId", soup_nodeId.get("value"))
if not nodeId:
raise ScrapeError(422, message="nodeId not found", origin=scrape_item) from None

async with self.request_limiter:
password_data = {"folderPassword": password, "folderId": nodeId, "submitme": 1}
password_response: dict = await self.client.post_data(
self.domain,
self.api_password_process,
data=password_data,
origin=scrape_item,
)
if not password_response.get("success"):
raise PasswordProtectedError(message="Incorrect password", origin=scrape_item)

ajax_dict: dict = await self.client.post_data(
self.domain,
final_entrypoint,
data=data,
origin=scrape_item,
)
ajax_soup = BeautifulSoup(ajax_dict["html"].replace("\\", ""), "html.parser")

return ajax_soup, ajax_dict["page_title"]
2 changes: 1 addition & 1 deletion cyberdrop_dl/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def __init__(self, manager: Manager) -> None:
"xxxbunker": self.xxxbunker,
}

is_testing = next((tag for tag in PRELEASE_TAGS if tag in current_version), False)
is_testing = next((tag for tag in PRERELEASE_TAGS if tag in current_version), False)
if is_testing:
self.mapping["simpcity"] = self.simpcity

Expand Down
Loading

0 comments on commit 2b3b76c

Please sign in to comment.