From ff5ec328d1b13a3597fe7e8ccdbf72ed0b262591 Mon Sep 17 00:00:00 2001 From: speedyconzales <9094731+speedyconzales@users.noreply.github.com> Date: Wed, 15 May 2024 23:31:37 +0200 Subject: [PATCH] adds support of hoster doodstream (#24) --- README.md | 1 + main.py | 4 ++-- src/argument_parser.py | 2 +- src/downloader.py | 9 +++++---- src/html_scraper.py | 36 ++++++++++++++++++++++++++++-------- src/logger.py | 10 +++++----- 6 files changed, 42 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index f651fd7..38e5364 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ headless and completely automated scraping of the following sites: ## Supported Hosters - [VOE](https://voe.sx) - [Vidoza](https://vidoza.net) +- [Doodstream](https://doodstream.com) - [Streamtape](https://streamtape.com) ## Usage diff --git a/main.py b/main.py index b13fa23..356fe4b 100644 --- a/main.py +++ b/main.py @@ -62,7 +62,7 @@ def check_episodes( provider_episodes.append(episode) continue logger.debug(f"{provider} content URL is: {content_url}") - future_list.append(create_new_download_thread(executor, content_url, file_name, episode)) + future_list.append(create_new_download_thread(executor, content_url, file_name, episode, provider)) return provider_episodes, language_episodes, future_list @@ -83,7 +83,7 @@ def main(): os.makedirs(output_path, exist_ok=True) - provider_list = ["VOE", "VOE", "Vidoza", "Streamtape"] if not provider else provider + provider_list = ["VOE", "Vidoza", "Doodstream", "Streamtape"] if not provider else provider for season in seasons: season_path = f"{output_path}/Season {season:02}" diff --git a/src/argument_parser.py b/src/argument_parser.py index fabaedb..568909b 100644 --- a/src/argument_parser.py +++ b/src/argument_parser.py @@ -37,7 +37,7 @@ def parse_range(episodes): parser.add_argument("-s", "--season", type=int, help="specify the season") parser.add_argument("-e", "--episode", nargs='+', type=str, help="specify a list of episode numbers") parser.add_argument("-t", "--threads", type=int, help="specify the number of threads or concurrent downloads") - parser.add_argument("-p", "--provider", choices=["VOE", "Vidoza", "Streamtape"], help="Choose the hoster/provider you want to download from") + parser.add_argument("-p", "--provider", choices=["VOE", "Vidoza", "Streamtape", "Doodstream"], help="Choose the hoster/provider you want to download from") parser.add_argument("-a", "--anime", action='store_true', help="specify if the content is an anime") args = parser.parse_args() diff --git a/src/downloader.py b/src/downloader.py index cc9ee26..626b880 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -13,9 +13,10 @@ def already_downloaded(file_name): return False -def download_episode(url, file_name, episode): +def download_episode(url, file_name, episode, provider): try: - ffmpeg_cmd = ["ffmpeg", "-i", url, "-c", "copy", "-nostdin", file_name] + ffmpeg_cmd = ["ffmpeg", "-headers", "Referer: https://d0000d.com/", "-i", url, "-c", "copy", "-nostdin", file_name] if provider == "Doodstream" \ + else ["ffmpeg", "-i", url, "-c", "copy", "-nostdin", file_name] logger.info(f"Episode '{file_name}' added to queue.") if platform.system() == "Windows": subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) @@ -35,5 +36,5 @@ def download_episode(url, file_name, episode): return episode -def create_new_download_thread(executor, content_url, file_name, episode): - return executor.submit(download_episode, content_url, file_name, episode) +def create_new_download_thread(executor, content_url, file_name, episode, provider): + return executor.submit(download_episode, content_url, file_name, episode, provider) diff --git a/src/html_scraper.py b/src/html_scraper.py index 810ea38..ebf90e9 100644 --- a/src/html_scraper.py +++ b/src/html_scraper.py @@ -4,6 +4,9 @@ import urllib.request import zipfile +from random import choices +from string import ascii_letters, digits +from time import time from urllib.parse import urlsplit, urlunsplit from bs4 import BeautifulSoup @@ -17,7 +20,7 @@ from src.episode_link_grabber import get_href_by_language, get_bs_href_by_language from src.logger import logger - +DOODSTREAM_PATTERN = re.compile(r"/pass_md5/[\w-]+/(?P[\w-]+)") VOE_PATTERNS = [re.compile(r"'hls': '(?P.+)'"), re.compile(r'prompt\("Node",\s*"(?P[^"]+)"')] STREAMTAPE_PATTERN = re.compile(r"get_video\?id=[^&\'\s]+&expires=[^&\'\s]+&ip=[^&\'\s]+&token=[^&\'\s]+\'") @@ -60,16 +63,19 @@ def get_voe_content_link_with_selenium(provider_url): def find_content_url(url, provider): - html_page = urllib.request.urlopen(url) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' + } + req = urllib.request.Request(url, headers=headers) + decoded_html = urllib.request.urlopen(req).read().decode("utf-8") if provider == "Vidoza": - soup = BeautifulSoup(html_page, features="html.parser") + soup = BeautifulSoup(decoded_html, features="html.parser") content_link = soup.find("source").get("src") elif provider == "VOE": def content_link_is_not_valid(content_link): return content_link is None or not content_link.startswith("https://") - html_page = html_page.read().decode("utf-8") for VOE_PATTERN in VOE_PATTERNS: - content_link = VOE_PATTERN.search(html_page).group("url") + content_link = VOE_PATTERN.search(decoded_html).group("url") if content_link_is_not_valid(content_link): continue else: @@ -79,10 +85,18 @@ def content_link_is_not_valid(content_link): logger.critical("Failed to find the video links of provider VOE. Exiting...") sys.exit(1) elif provider == "Streamtape": - content_link = STREAMTAPE_PATTERN.search(html_page.read().decode("utf-8")) + content_link = STREAMTAPE_PATTERN.search(decoded_html) if content_link is None: return find_content_url(url, provider) content_link = "https://" + provider + ".com/" + content_link.group()[:-1] + elif provider == "Doodstream": + pattern_match = DOODSTREAM_PATTERN.search(decoded_html) + pass_md5 = pattern_match.group() + token = pattern_match.group("token") + headers['Referer'] = 'https://d0000d.com/' + req = urllib.request.Request(f"https://d0000d.com{pass_md5}", headers=headers) + response_page = urllib.request.urlopen(req) + content_link = f"{response_page.read().decode("utf-8")}{''.join(choices(ascii_letters + digits, k=10))}?token={token}&expiry={int(time() * 1000)}" logger.debug(f"Found the following video link of {provider}: {content_link}") return content_link @@ -110,9 +124,15 @@ def find_bs_link_to_episode(url, provider): sb.click('.cc-compliance a') sb.click('.hoster-player .play') if provider == "VOE": - content_link = sb.wait_for_element_visible('.hoster-player a', timeout=120).get_attribute("href") + content_link = sb.wait_for_element_visible('.hoster-player a', timeout=240).get_attribute("href") + elif provider == "Doodstream": + sb.switch_to_tab(1, timeout=240) + html = sb.get_page_source() + soup = BeautifulSoup(html, features="html.parser") + iframe_src = soup.find("iframe").get("src") + content_link = f"https://d000d.com{iframe_src}" elif provider in ["Streamtape", "Vidoza"]: - content_link = sb.wait_for_element_visible('.hoster-player iframe', timeout=120).get_attribute("src") + content_link = sb.wait_for_element_visible('.hoster-player iframe', timeout=240).get_attribute("src") else: logger.error("No supported hoster available for this episode") return content_link diff --git a/src/logger.py b/src/logger.py index 5dde8a5..3640653 100644 --- a/src/logger.py +++ b/src/logger.py @@ -3,11 +3,11 @@ class ColoredFormatter(logging.Formatter): COLORS = { - 'DEBUG': '\033[94m', # Blue - 'WARNING': '\033[93m', # Yellow - 'ERROR': '\033[91m', # Red - 'CRITICAL': '\033[95m', # Magenta - 'SUCCESS': '\033[92m', # Green + 'DEBUG': '\033[34m', # Blue + 'WARNING': '\033[33m', # Yellow + 'ERROR': '\033[31m', # Red + 'CRITICAL': '\033[35m', # Magenta + 'SUCCESS': '\033[32m', # Green 'RESET': '\033[0m' # Reset to default color }