[^/#?]+)
'''
+ _M3U8_PATH = 'api/channel/hls'
_TESTS = [{
'url': 'http://www.twitch.tv/shroomztv',
@@ -1026,23 +1029,10 @@ def _real_extract(self, url):
access_token = self._download_access_token(
channel_name, 'stream', 'channelName')
- token = access_token['value']
stream_id = stream.get('id') or channel_name
- query = {
- 'allow_source': 'true',
- 'allow_audio_only': 'true',
- 'allow_spectre': 'true',
- 'p': random.randint(1000000, 10000000),
- 'player': 'twitchweb',
- 'playlist_include_framerate': 'true',
- 'segment_preference': '4',
- 'sig': access_token['signature'].encode('utf-8'),
- 'token': token.encode('utf-8'),
- }
- formats = self._extract_m3u8_formats(
- '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name),
- stream_id, 'mp4', query=query)
+ formats = self._extract_twitch_m3u8_formats(
+ channel_name, access_token['value'], access_token['signature'])
self._prefer_source(formats)
view_count = stream.get('viewers')
diff --git a/yt_dlp/extractor/viously.py b/yt_dlp/extractor/viously.py
new file mode 100644
index 000000000000..9ec7ed35f5da
--- /dev/null
+++ b/yt_dlp/extractor/viously.py
@@ -0,0 +1,60 @@
+import base64
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+ extract_attributes,
+ int_or_none,
+ parse_iso8601,
+)
+from ..utils.traversal import traverse_obj
+
+
+class ViouslyIE(InfoExtractor):
+ _VALID_URL = False
+ _WEBPAGE_TESTS = [{
+ 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html',
+ 'md5': '37a6c3381599381ff53a7e1e0575c0bc',
+ 'info_dict': {
+ 'id': 'F_xQzS2jwb3',
+ 'ext': 'mp4',
+ 'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+ 'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...',
+ 'age_limit': 0,
+ 'upload_date': '20230328',
+ 'timestamp': 1680037507,
+ 'duration': 3716,
+ 'categories': ['motors'],
+ }
+ }]
+
+ def _extract_from_webpage(self, url, webpage):
+ viously_players = re.findall(r']*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage)
+ if not viously_players:
+ return
+
+ def custom_decode(text):
+ STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/='
+ CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/='
+ data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET)))
+ return data.decode('utf-8').strip('\x00')
+
+ for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')):
+ formats = self._extract_m3u8_formats(
+ f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False)
+ if not formats:
+ continue
+ data = self._download_json(
+ f'https://www.viously.com/export/json/{video_id}', video_id,
+ transform_source=custom_decode, fatal=False)
+ yield {
+ 'id': video_id,
+ 'formats': formats,
+ **traverse_obj(data, ('video', {
+ 'title': ('title', {str}),
+ 'description': ('description', {str}),
+ 'duration': ('duration', {int_or_none}),
+ 'timestamp': ('iso_date', {parse_iso8601}),
+ 'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}),
+ })),
+ }
diff --git a/yt_dlp/extractor/wordpress.py b/yt_dlp/extractor/wordpress.py
index 53820b57a907..378d99dbcca9 100644
--- a/yt_dlp/extractor/wordpress.py
+++ b/yt_dlp/extractor/wordpress.py
@@ -70,7 +70,7 @@ def _extract_from_webpage(self, url, webpage):
'height': int_or_none(traverse_obj(track, ('dimensions', 'original', 'height'))),
'width': int_or_none(traverse_obj(track, ('dimensions', 'original', 'width'))),
} for track in traverse_obj(playlist_json, ('tracks', ...), expected_type=dict)]
- yield self.playlist_result(entries, self._generic_id(url) + f'-wp-playlist-{i+1}', 'Wordpress Playlist')
+ yield self.playlist_result(entries, self._generic_id(url) + f'-wp-playlist-{i + 1}', 'Wordpress Playlist')
class WordpressMiniAudioPlayerEmbedIE(InfoExtractor):
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py
index 5b14b187a73e..88126d11f090 100644
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -5297,6 +5297,7 @@ def _extract_webpage(self, url, item_id, fatal=True):
# See: https://github.com/yt-dlp/yt-dlp/issues/116
if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'):
retry.error = ExtractorError('Incomplete yt initial data received')
+ data = None
continue
return webpage, data
diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py
index 96c5a0678f85..acadc0147d59 100644
--- a/yt_dlp/networking/__init__.py
+++ b/yt_dlp/networking/__init__.py
@@ -28,4 +28,3 @@
pass
except Exception as e:
warnings.warn(f'Failed to import "websockets" request handler: {e}' + bug_reports_message())
-
diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py
index a6fa3550bd1e..d79dd795305a 100644
--- a/yt_dlp/networking/_helper.py
+++ b/yt_dlp/networking/_helper.py
@@ -219,7 +219,7 @@ def _socket_connect(ip_addr, timeout, source_address):
sock.bind(source_address)
sock.connect(sa)
return sock
- except socket.error:
+ except OSError:
sock.close()
raise
@@ -237,7 +237,7 @@ def create_socks_proxy_socket(dest_addr, proxy_args, proxy_ip_addr, timeout, sou
sock.bind(source_address)
sock.connect(dest_addr)
return sock
- except socket.error:
+ except OSError:
sock.close()
raise
@@ -255,7 +255,7 @@ def create_connection(
host, port = address
ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
if not ip_addrs:
- raise socket.error('getaddrinfo returns an empty list')
+ raise OSError('getaddrinfo returns an empty list')
if source_address is not None:
af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6
ip_addrs = [addr for addr in ip_addrs if addr[0] == af]
@@ -272,7 +272,7 @@ def create_connection(
# https://bugs.python.org/issue36820
err = None
return sock
- except socket.error as e:
+ except OSError as e:
err = e
try:
diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py
index 9fb1d75f4a39..e129110ca471 100644
--- a/yt_dlp/networking/_requests.py
+++ b/yt_dlp/networking/_requests.py
@@ -188,6 +188,7 @@ class RequestsSession(requests.sessions.Session):
"""
Ensure unified redirect method handling with our urllib redirect handler.
"""
+
def rebuild_method(self, prepared_request, response):
new_method = get_redirect_method(prepared_request.method, response.status_code)
@@ -218,6 +219,7 @@ def filter(self, record):
class Urllib3LoggingHandler(logging.Handler):
"""Redirect urllib3 logs to our logger"""
+
def __init__(self, logger, *args, **kwargs):
super().__init__(*args, **kwargs)
self._logger = logger
@@ -367,7 +369,7 @@ def _new_conn(self):
self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e
except SocksProxyError as e:
raise urllib3.exceptions.ProxyError(str(e), e) from e
- except (OSError, socket.error) as e:
+ except OSError as e:
raise urllib3.exceptions.NewConnectionError(
self, f'Failed to establish a new connection: {e}') from e
diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py
index ad85554e459a..ed64080d62a2 100644
--- a/yt_dlp/networking/_websockets.py
+++ b/yt_dlp/networking/_websockets.py
@@ -5,20 +5,26 @@
import ssl
import sys
-from ._helper import create_connection, select_proxy, make_socks_proxy_opts, create_socks_proxy_socket
-from .common import Response, register_rh, Features
+from ._helper import (
+ create_connection,
+ create_socks_proxy_socket,
+ make_socks_proxy_opts,
+ select_proxy,
+)
+from .common import Features, Response, register_rh
from .exceptions import (
CertificateVerifyError,
HTTPError,
+ ProxyError,
RequestError,
SSLError,
- TransportError, ProxyError,
+ TransportError,
)
from .websocket import WebSocketRequestHandler, WebSocketResponse
from ..compat import functools
from ..dependencies import websockets
-from ..utils import int_or_none
from ..socks import ProxyError as SocksProxyError
+from ..utils import int_or_none
if not websockets:
raise ImportError('websockets is not installed')
diff --git a/yt_dlp/networking/websocket.py b/yt_dlp/networking/websocket.py
index 09fcf78ac2d6..0e7e73c9e22a 100644
--- a/yt_dlp/networking/websocket.py
+++ b/yt_dlp/networking/websocket.py
@@ -2,7 +2,7 @@
import abc
-from .common import Response, RequestHandler
+from .common import RequestHandler, Response
class WebSocketResponse(Response):
diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py
index e7f41d7e2a45..b4957ac2ed28 100644
--- a/yt_dlp/socks.py
+++ b/yt_dlp/socks.py
@@ -49,7 +49,7 @@ class Socks5AddressType:
ATYP_IPV6 = 0x04
-class ProxyError(socket.error):
+class ProxyError(OSError):
ERR_SUCCESS = 0x00
def __init__(self, code=None, msg=None):
diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py
index 361617c0287a..89a0d4cff1bc 100644
--- a/yt_dlp/utils/_utils.py
+++ b/yt_dlp/utils/_utils.py
@@ -558,7 +558,7 @@ def decode(self, s):
s = self._close_object(e)
if s is not None:
continue
- raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
+ raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
assert False, 'Too many attempts to decode JSON'
@@ -1885,6 +1885,7 @@ def setproctitle(title):
buf = ctypes.create_string_buffer(len(title_bytes))
buf.value = title_bytes
try:
+ # PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
libc.prctl(15, buf, 0, 0, 0)
except AttributeError:
return # Strange libc, just skip this
@@ -2260,6 +2261,9 @@ def __getitem__(self, idx):
raise self.IndexError()
return entries[0]
+ def __bool__(self):
+ return bool(self.getslice(0, 1))
+
class OnDemandPagedList(PagedList):
"""Download pages until a page with less than maximum results"""
@@ -5070,7 +5074,7 @@ def truncate_string(s, left, right=0):
assert left > 3 and right >= 0
if s is None or len(s) <= left + right:
return s
- return f'{s[:left-3]}...{s[-right:] if right else ""}'
+ return f'{s[:left - 3]}...{s[-right:] if right else ""}'
def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py
index ff5703198aea..8938f4c78298 100644
--- a/yt_dlp/utils/traversal.py
+++ b/yt_dlp/utils/traversal.py
@@ -3,6 +3,7 @@
import inspect
import itertools
import re
+import xml.etree.ElementTree
from ._utils import (
IDENTITY,
@@ -23,7 +24,7 @@ def traverse_obj(
>>> obj = [{}, {"key": "value"}]
>>> traverse_obj(obj, (1, "key"))
- "value"
+ 'value'
Each of the provided `paths` is tested and the first producing a valid result will be returned.
The next path will also be tested if the path branched but no results could be found.
@@ -118,7 +119,7 @@ def apply_key(key, obj, is_last):
branching = True
if isinstance(obj, collections.abc.Mapping):
result = obj.values()
- elif is_iterable_like(obj):
+ elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
result = obj
elif isinstance(obj, re.Match):
result = obj.groups()
@@ -132,7 +133,7 @@ def apply_key(key, obj, is_last):
branching = True
if isinstance(obj, collections.abc.Mapping):
iter_obj = obj.items()
- elif is_iterable_like(obj):
+ elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element):
iter_obj = enumerate(obj)
elif isinstance(obj, re.Match):
iter_obj = itertools.chain(
@@ -168,7 +169,7 @@ def apply_key(key, obj, is_last):
result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
elif isinstance(key, (int, slice)):
- if is_iterable_like(obj, collections.abc.Sequence):
+ if is_iterable_like(obj, (collections.abc.Sequence, xml.etree.ElementTree.Element)):
branching = isinstance(key, slice)
with contextlib.suppress(IndexError):
result = obj[key]
@@ -176,6 +177,34 @@ def apply_key(key, obj, is_last):
with contextlib.suppress(IndexError):
result = str(obj)[key]
+ elif isinstance(obj, xml.etree.ElementTree.Element) and isinstance(key, str):
+ xpath, _, special = key.rpartition('/')
+ if not special.startswith('@') and special != 'text()':
+ xpath = key
+ special = None
+
+ # Allow abbreviations of relative paths, absolute paths error
+ if xpath.startswith('/'):
+ xpath = f'.{xpath}'
+ elif xpath and not xpath.startswith('./'):
+ xpath = f'./{xpath}'
+
+ def apply_specials(element):
+ if special is None:
+ return element
+ if special == '@':
+ return element.attrib
+ if special.startswith('@'):
+ return try_call(element.attrib.get, args=(special[1:],))
+ if special == 'text()':
+ return element.text
+ assert False, f'apply_specials is missing case for {special!r}'
+
+ if xpath:
+ result = list(map(apply_specials, obj.iterfind(xpath)))
+ else:
+ result = apply_specials(obj)
+
return branching, result if branching else (result,)
def lazy_last(iterable):
diff --git a/yt_dlp/version.py b/yt_dlp/version.py
index fd923fe45e77..687ef8788f78 100644
--- a/yt_dlp/version.py
+++ b/yt_dlp/version.py
@@ -1,8 +1,8 @@
# Autogenerated by devscripts/update-version.py
-__version__ = '2023.11.16'
+__version__ = '2023.12.30'
-RELEASE_GIT_HEAD = '24f827875c6ba513f12ed09a3aef2bbed223760d'
+RELEASE_GIT_HEAD = 'f10589e3453009bb523f55849bba144c9b91cf2a'
VARIANT = None
@@ -12,4 +12,4 @@
ORIGIN = 'yt-dlp/yt-dlp'
-_pkg_version = '2023.11.16'
+_pkg_version = '2023.12.30'