Skip to content

Commit

Permalink
adds support of hoster doodstream (#24)
Browse files Browse the repository at this point in the history
  • Loading branch information
speedyconzales authored May 15, 2024
1 parent 50122a3 commit ff5ec32
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 20 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ headless and completely automated scraping of the following sites:
## Supported Hosters
- [VOE](https://voe.sx)
- [Vidoza](https://vidoza.net)
- [Doodstream](https://doodstream.com)
- [Streamtape](https://streamtape.com)

## Usage
Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def check_episodes(
provider_episodes.append(episode)
continue
logger.debug(f"{provider} content URL is: {content_url}")
future_list.append(create_new_download_thread(executor, content_url, file_name, episode))
future_list.append(create_new_download_thread(executor, content_url, file_name, episode, provider))
return provider_episodes, language_episodes, future_list


Expand All @@ -83,7 +83,7 @@ def main():

os.makedirs(output_path, exist_ok=True)

provider_list = ["VOE", "VOE", "Vidoza", "Streamtape"] if not provider else provider
provider_list = ["VOE", "Vidoza", "Doodstream", "Streamtape"] if not provider else provider

for season in seasons:
season_path = f"{output_path}/Season {season:02}"
Expand Down
2 changes: 1 addition & 1 deletion src/argument_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def parse_range(episodes):
parser.add_argument("-s", "--season", type=int, help="specify the season")
parser.add_argument("-e", "--episode", nargs='+', type=str, help="specify a list of episode numbers")
parser.add_argument("-t", "--threads", type=int, help="specify the number of threads or concurrent downloads")
parser.add_argument("-p", "--provider", choices=["VOE", "Vidoza", "Streamtape"], help="Choose the hoster/provider you want to download from")
parser.add_argument("-p", "--provider", choices=["VOE", "Vidoza", "Streamtape", "Doodstream"], help="Choose the hoster/provider you want to download from")
parser.add_argument("-a", "--anime", action='store_true', help="specify if the content is an anime")

args = parser.parse_args()
Expand Down
9 changes: 5 additions & 4 deletions src/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ def already_downloaded(file_name):
return False


def download_episode(url, file_name, episode):
def download_episode(url, file_name, episode, provider):
try:
ffmpeg_cmd = ["ffmpeg", "-i", url, "-c", "copy", "-nostdin", file_name]
ffmpeg_cmd = ["ffmpeg", "-headers", "Referer: https://d0000d.com/", "-i", url, "-c", "copy", "-nostdin", file_name] if provider == "Doodstream" \
else ["ffmpeg", "-i", url, "-c", "copy", "-nostdin", file_name]
logger.info(f"Episode '{file_name}' added to queue.")
if platform.system() == "Windows":
subprocess.run(ffmpeg_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand All @@ -35,5 +36,5 @@ def download_episode(url, file_name, episode):
return episode


def create_new_download_thread(executor, content_url, file_name, episode):
return executor.submit(download_episode, content_url, file_name, episode)
def create_new_download_thread(executor, content_url, file_name, episode, provider):
return executor.submit(download_episode, content_url, file_name, episode, provider)
36 changes: 28 additions & 8 deletions src/html_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
import urllib.request
import zipfile

from random import choices
from string import ascii_letters, digits
from time import time
from urllib.parse import urlsplit, urlunsplit

from bs4 import BeautifulSoup
Expand All @@ -17,7 +20,7 @@
from src.episode_link_grabber import get_href_by_language, get_bs_href_by_language
from src.logger import logger


DOODSTREAM_PATTERN = re.compile(r"/pass_md5/[\w-]+/(?P<token>[\w-]+)")
VOE_PATTERNS = [re.compile(r"'hls': '(?P<url>.+)'"),
re.compile(r'prompt\("Node",\s*"(?P<url>[^"]+)"')]
STREAMTAPE_PATTERN = re.compile(r"get_video\?id=[^&\'\s]+&expires=[^&\'\s]+&ip=[^&\'\s]+&token=[^&\'\s]+\'")
Expand Down Expand Up @@ -60,16 +63,19 @@ def get_voe_content_link_with_selenium(provider_url):


def find_content_url(url, provider):
html_page = urllib.request.urlopen(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
req = urllib.request.Request(url, headers=headers)
decoded_html = urllib.request.urlopen(req).read().decode("utf-8")
if provider == "Vidoza":
soup = BeautifulSoup(html_page, features="html.parser")
soup = BeautifulSoup(decoded_html, features="html.parser")
content_link = soup.find("source").get("src")
elif provider == "VOE":
def content_link_is_not_valid(content_link):
return content_link is None or not content_link.startswith("https://")
html_page = html_page.read().decode("utf-8")
for VOE_PATTERN in VOE_PATTERNS:
content_link = VOE_PATTERN.search(html_page).group("url")
content_link = VOE_PATTERN.search(decoded_html).group("url")
if content_link_is_not_valid(content_link):
continue
else:
Expand All @@ -79,10 +85,18 @@ def content_link_is_not_valid(content_link):
logger.critical("Failed to find the video links of provider VOE. Exiting...")
sys.exit(1)
elif provider == "Streamtape":
content_link = STREAMTAPE_PATTERN.search(html_page.read().decode("utf-8"))
content_link = STREAMTAPE_PATTERN.search(decoded_html)
if content_link is None:
return find_content_url(url, provider)
content_link = "https://" + provider + ".com/" + content_link.group()[:-1]
elif provider == "Doodstream":
pattern_match = DOODSTREAM_PATTERN.search(decoded_html)
pass_md5 = pattern_match.group()
token = pattern_match.group("token")
headers['Referer'] = 'https://d0000d.com/'
req = urllib.request.Request(f"https://d0000d.com{pass_md5}", headers=headers)
response_page = urllib.request.urlopen(req)
content_link = f"{response_page.read().decode("utf-8")}{''.join(choices(ascii_letters + digits, k=10))}?token={token}&expiry={int(time() * 1000)}"
logger.debug(f"Found the following video link of {provider}: {content_link}")
return content_link

Expand Down Expand Up @@ -110,9 +124,15 @@ def find_bs_link_to_episode(url, provider):
sb.click('.cc-compliance a')
sb.click('.hoster-player .play')
if provider == "VOE":
content_link = sb.wait_for_element_visible('.hoster-player a', timeout=120).get_attribute("href")
content_link = sb.wait_for_element_visible('.hoster-player a', timeout=240).get_attribute("href")
elif provider == "Doodstream":
sb.switch_to_tab(1, timeout=240)
html = sb.get_page_source()
soup = BeautifulSoup(html, features="html.parser")
iframe_src = soup.find("iframe").get("src")
content_link = f"https://d000d.com{iframe_src}"
elif provider in ["Streamtape", "Vidoza"]:
content_link = sb.wait_for_element_visible('.hoster-player iframe', timeout=120).get_attribute("src")
content_link = sb.wait_for_element_visible('.hoster-player iframe', timeout=240).get_attribute("src")
else:
logger.error("No supported hoster available for this episode")
return content_link
Expand Down
10 changes: 5 additions & 5 deletions src/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@

class ColoredFormatter(logging.Formatter):
COLORS = {
'DEBUG': '\033[94m', # Blue
'WARNING': '\033[93m', # Yellow
'ERROR': '\033[91m', # Red
'CRITICAL': '\033[95m', # Magenta
'SUCCESS': '\033[92m', # Green
'DEBUG': '\033[34m', # Blue
'WARNING': '\033[33m', # Yellow
'ERROR': '\033[31m', # Red
'CRITICAL': '\033[35m', # Magenta
'SUCCESS': '\033[32m', # Green
'RESET': '\033[0m' # Reset to default color
}

Expand Down

0 comments on commit ff5ec32

Please sign in to comment.