adds support of hoster doodstream (#24)

speedyconzales · May 15, 2024 · ff5ec32 · ff5ec32
1 parent 50122a3
commit ff5ec32
Show file tree

Hide file tree

Showing 6 changed files with 42 additions and 20 deletions.
diff --git a/README.md b/README.md
@@ -9,6 +9,7 @@ headless and completely automated scraping of the following sites:
 ## Supported Hosters
 - [VOE](https://voe.sx)
 - [Vidoza](https://vidoza.net)
+- [Doodstream](https://doodstream.com)
 - [Streamtape](https://streamtape.com)
 
 ## Usage

diff --git a/main.py b/main.py
@@ -62,7 +62,7 @@ def check_episodes(
                 provider_episodes.append(episode)
                 continue
             logger.debug(f"{provider} content URL is: {content_url}")
-            future_list.append(create_new_download_thread(executor, content_url, file_name, episode))
+            future_list.append(create_new_download_thread(executor, content_url, file_name, episode, provider))
     return provider_episodes, language_episodes, future_list
 
 
@@ -83,7 +83,7 @@ def main():
 
     os.makedirs(output_path, exist_ok=True)
 
-    provider_list = ["VOE", "VOE", "Vidoza", "Streamtape"] if not provider else provider
+    provider_list = ["VOE", "Vidoza", "Doodstream", "Streamtape"] if not provider else provider
 
     for season in seasons:
         season_path = f"{output_path}/Season {season:02}"

diff --git a/src/argument_parser.py b/src/argument_parser.py
@@ -37,7 +37,7 @@ def parse_range(episodes):
     parser.add_argument("-s", "--season", type=int, help="specify the season")
     parser.add_argument("-e", "--episode", nargs='+', type=str, help="specify a list of episode numbers")
     parser.add_argument("-t", "--threads", type=int, help="specify the number of threads or concurrent downloads")
-    parser.add_argument("-p", "--provider", choices=["VOE", "Vidoza", "Streamtape"], help="Choose the hoster/provider you want to download from")
+    parser.add_argument("-p", "--provider", choices=["VOE", "Vidoza", "Streamtape", "Doodstream"], help="Choose the hoster/provider you want to download from")
     parser.add_argument("-a", "--anime", action='store_true', help="specify if the content is an anime")
 
     args = parser.parse_args()

diff --git a/src/downloader.py b/src/downloader.py
@@ -13,9 +13,10 @@ def already_downloaded(file_name):
     return False
 
 
-def download_episode(url, file_name, episode):
+def download_episode(url, file_name, episode, provider):
     try:
-        ffmpeg_cmd = ["ffmpeg", "-i", url, "-c", "copy", "-nostdin", file_name]
+        ffmpeg_cmd = ["ffmpeg", "-headers", "Referer: https://d0000d.com/", "-i", url, "-c", "copy", "-nostdin", file_name] if provider == "Doodstream" \
+        else ["ffmpeg", "-i", url, "-c", "copy", "-nostdin", file_name]
         logger.info(f"Episode '{file_name}' added to queue.")
         if platform.system() == "Windows":
             subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
@@ -35,5 +36,5 @@ def download_episode(url, file_name, episode):
         return episode
 
 
-def create_new_download_thread(executor, content_url, file_name, episode):
-    return executor.submit(download_episode, content_url, file_name, episode)
+def create_new_download_thread(executor, content_url, file_name, episode, provider):
+    return executor.submit(download_episode, content_url, file_name, episode, provider)
diff --git a/src/html_scraper.py b/src/html_scraper.py
@@ -4,6 +4,9 @@
 import urllib.request
 import zipfile
 
+from random import choices
+from string import ascii_letters, digits
+from time import time
 from urllib.parse import urlsplit, urlunsplit
 
 from bs4 import BeautifulSoup
@@ -17,7 +20,7 @@
 from src.episode_link_grabber import get_href_by_language, get_bs_href_by_language
 from src.logger import logger
 
-
+DOODSTREAM_PATTERN = re.compile(r"/pass_md5/[\w-]+/(?P<token>[\w-]+)")
 VOE_PATTERNS = [re.compile(r"'hls': '(?P<url>.+)'"),
                 re.compile(r'prompt\("Node",\s*"(?P<url>[^"]+)"')]
 STREAMTAPE_PATTERN = re.compile(r"get_video\?id=[^&\'\s]+&expires=[^&\'\s]+&ip=[^&\'\s]+&token=[^&\'\s]+\'")
@@ -60,16 +63,19 @@ def get_voe_content_link_with_selenium(provider_url):
 
 
 def find_content_url(url, provider):
-    html_page = urllib.request.urlopen(url)
+    headers = {
+        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
+    }
+    req = urllib.request.Request(url, headers=headers)
+    decoded_html = urllib.request.urlopen(req).read().decode("utf-8")
     if provider == "Vidoza":
-        soup = BeautifulSoup(html_page, features="html.parser")
+        soup = BeautifulSoup(decoded_html, features="html.parser")
         content_link = soup.find("source").get("src")
     elif provider == "VOE":
         def content_link_is_not_valid(content_link):
             return content_link is None or not content_link.startswith("https://")
-        html_page = html_page.read().decode("utf-8")
         for VOE_PATTERN in VOE_PATTERNS:
-            content_link = VOE_PATTERN.search(html_page).group("url")
+            content_link = VOE_PATTERN.search(decoded_html).group("url")
             if content_link_is_not_valid(content_link):
                 continue
             else:
@@ -79,10 +85,18 @@ def content_link_is_not_valid(content_link):
             logger.critical("Failed to find the video links of provider VOE. Exiting...")
             sys.exit(1)
     elif provider == "Streamtape":
-        content_link = STREAMTAPE_PATTERN.search(html_page.read().decode("utf-8"))
+        content_link = STREAMTAPE_PATTERN.search(decoded_html)
         if content_link is None:
             return find_content_url(url, provider)
         content_link = "https://" + provider + ".com/" + content_link.group()[:-1]
+    elif provider == "Doodstream":
+        pattern_match = DOODSTREAM_PATTERN.search(decoded_html)
+        pass_md5 = pattern_match.group()
+        token = pattern_match.group("token")
+        headers['Referer'] = 'https://d0000d.com/'
+        req = urllib.request.Request(f"https://d0000d.com{pass_md5}", headers=headers)
+        response_page = urllib.request.urlopen(req)
+        content_link = f"{response_page.read().decode("utf-8")}{''.join(choices(ascii_letters + digits, k=10))}?token={token}&expiry={int(time() * 1000)}"
     logger.debug(f"Found the following video link of {provider}: {content_link}")
     return content_link
 
@@ -110,9 +124,15 @@ def find_bs_link_to_episode(url, provider):
         sb.click('.cc-compliance a')
         sb.click('.hoster-player .play')
         if provider == "VOE":
-            content_link = sb.wait_for_element_visible('.hoster-player a', timeout=120).get_attribute("href")
+            content_link = sb.wait_for_element_visible('.hoster-player a', timeout=240).get_attribute("href")
+        elif provider == "Doodstream":
+            sb.switch_to_tab(1, timeout=240)
+            html = sb.get_page_source()
+            soup = BeautifulSoup(html, features="html.parser")
+            iframe_src = soup.find("iframe").get("src")
+            content_link = f"https://d000d.com{iframe_src}"
         elif provider in ["Streamtape", "Vidoza"]:
-            content_link = sb.wait_for_element_visible('.hoster-player iframe', timeout=120).get_attribute("src")
+            content_link = sb.wait_for_element_visible('.hoster-player iframe', timeout=240).get_attribute("src")
         else:
             logger.error("No supported hoster available for this episode")
     return content_link

diff --git a/src/logger.py b/src/logger.py
@@ -3,11 +3,11 @@
 
 class ColoredFormatter(logging.Formatter):
     COLORS = {
-        'DEBUG': '\033[94m',  # Blue
-        'WARNING': '\033[93m',  # Yellow
-        'ERROR': '\033[91m',   # Red
-        'CRITICAL': '\033[95m',  # Magenta
-        'SUCCESS': '\033[92m',   # Green
+        'DEBUG': '\033[34m',  # Blue
+        'WARNING': '\033[33m',  # Yellow
+        'ERROR': '\033[31m',   # Red
+        'CRITICAL': '\033[35m',  # Magenta
+        'SUCCESS': '\033[32m',   # Green
         'RESET': '\033[0m'   # Reset to default color
     }