diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index 3da5f802093..26cff0db20f 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1327,7 +1327,10 @@ ) from .trunews import TruNewsIE from .trutv import TruTVIE -from .tube8 import Tube8IE +from .tube8 import ( + Tube8IE, + Tube8ListIE, +) from .tubitv import TubiTvIE from .tumblr import TumblrIE from .tunein import ( diff --git a/youtube_dl/extractor/tube8.py b/youtube_dl/extractor/tube8.py index db93b018252..3ad6d6566b1 100644 --- a/youtube_dl/extractor/tube8.py +++ b/youtube_dl/extractor/tube8.py @@ -1,86 +1,264 @@ +# coding: utf-8 from __future__ import unicode_literals +import itertools import re +from time import sleep +from .common import InfoExtractor from ..utils import ( + clean_html, + get_element_by_class, + get_element_by_id, int_or_none, - str_to_int, + parse_qs, + strip_or_none, + T, + traverse_obj, + url_or_none, + urljoin, ) -from .keezmovies import KeezMoviesIE -class Tube8IE(KeezMoviesIE): - _VALID_URL = r'https?://(?:www\.)?tube8\.com/(?:[^/]+/)+(?P[^/]+)/(?P\d+)' +class Tube8IE(InfoExtractor): + _VALID_URL = r'https?:\/\/(?:www\.)?tube8\.com\/+porn-video+\/(?P\d+)' _TESTS = [{ - 'url': 'http://www.tube8.com/teen/kasia-music-video/229795/', - 'md5': '65e20c48e6abff62ed0c3965fff13a39', + 'url': 'https://www.tube8.com/porn-video/189530841/', + 'md5': '532408f59e89a32027d873af6289c85a', 'info_dict': { - 'id': '229795', - 'display_id': 'kasia-music-video', + 'id': '189530841', 'ext': 'mp4', - 'description': 'hot teen Kasia grinding', - 'uploader': 'unknown', - 'title': 'Kasia music video', + 'title': 'Found dildo. She let it cum in her tight ass to keep the secret', + 'thumbnail': r're:^https?://.*\.jpg$', + 'uploader': 'MaryKrylova', 'age_limit': 18, - 'duration': 230, - 'categories': ['Teen'], - 'tags': ['dancing'], - }, - }, { - 'url': 'http://www.tube8.com/shemale/teen/blonde-cd-gets-kidnapped-by-two-blacks-and-punished-for-being-a-slutty-girl/19569151/', - 'only_matching': True, + } }] - @staticmethod - def _extract_urls(webpage): - return re.findall( - r']+\bsrc=["\']((?:https?:)?//(?:www\.)?tube8\.com/embed/(?:[^/]+/)+\d+)', - webpage) + _EMBED_REGEX = r'