diff --git a/.github/workflows/core.yml b/.github/workflows/core.yml index ded7e6d612ef..eaaf03dee481 100644 --- a/.github/workflows/core.yml +++ b/.github/workflows/core.yml @@ -36,8 +36,8 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest] - # CPython 3.11 is in quick-test - python-version: ['3.8', '3.9', '3.10', '3.12', pypy-3.8, pypy-3.10] + # CPython 3.8 is in quick-test + python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest diff --git a/.github/workflows/quick-test.yml b/.github/workflows/quick-test.yml index 1ccfbe836f19..84fca62d4ddb 100644 --- a/.github/workflows/quick-test.yml +++ b/.github/workflows/quick-test.yml @@ -10,10 +10,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Set up Python 3.11 + - name: Set up Python 3.8 uses: actions/setup-python@v4 with: - python-version: '3.11' + python-version: '3.8' - name: Install test requirements run: pip install pytest -r requirements.txt - name: Run tests diff --git a/CONTRIBUTORS b/CONTRIBUTORS index 8b6b3671eb4a..adcc92144427 100644 --- a/CONTRIBUTORS +++ b/CONTRIBUTORS @@ -528,3 +528,17 @@ almx elivinsky starius TravisDupes +amir16yp +Fymyte +Ganesh910 +hashFactory +kclauhk +Kyraminol +lstrojny +middlingphys +NickCis +nicodato +prettykool +S-Aarab +sonmezberkay +TSRBerry diff --git a/Changelog.md b/Changelog.md index 6115446cb862..30de9072e032 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,93 @@ # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> +### 2023.12.30 + +#### Core changes +- [Fix format selection parse error for CPython 3.12](https://github.com/yt-dlp/yt-dlp/commit/00cdda4f6fe18712ced13dbc64b7ea10f323e268) ([#8797](https://github.com/yt-dlp/yt-dlp/issues/8797)) by [Grub4K](https://github.com/Grub4K) +- [Let `read_stdin` obey `--quiet`](https://github.com/yt-dlp/yt-dlp/commit/a174c453ee1e853c584ceadeac17eef2bd433dc5) by [pukkandan](https://github.com/pukkandan) +- [Merged with youtube-dl be008e6](https://github.com/yt-dlp/yt-dlp/commit/65de7d204ce88c0225df1321060304baab85dbd8) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [Grub4K](https://github.com/Grub4K) +- [Parse `release_year` from `release_date`](https://github.com/yt-dlp/yt-dlp/commit/1732eccc0a40256e076bf0435a29f0f1d8419280) ([#8524](https://github.com/yt-dlp/yt-dlp/issues/8524)) by [seproDev](https://github.com/seproDev) +- [Release workflow and Updater cleanup](https://github.com/yt-dlp/yt-dlp/commit/632b8ee54eb2df8ac6e20746a0bd95b7ebb053aa) ([#8640](https://github.com/yt-dlp/yt-dlp/issues/8640)) by [bashonly](https://github.com/bashonly) +- [Remove Python 3.7 support](https://github.com/yt-dlp/yt-dlp/commit/f4b95acafcd69a50040730dfdf732e797278fdcc) ([#8361](https://github.com/yt-dlp/yt-dlp/issues/8361)) by [bashonly](https://github.com/bashonly) +- [Support `NO_COLOR` environment variable](https://github.com/yt-dlp/yt-dlp/commit/a0b19d319a6ce8b7059318fa17a34b144fde1785) ([#8385](https://github.com/yt-dlp/yt-dlp/issues/8385)) by [Grub4K](https://github.com/Grub4K), [prettykool](https://github.com/prettykool) +- **outtmpl**: [Support multiplication](https://github.com/yt-dlp/yt-dlp/commit/993edd3f6e17e966c763bc86dc34125445cec6b6) by [pukkandan](https://github.com/pukkandan) +- **utils**: `traverse_obj`: [Move `is_user_input` into output template](https://github.com/yt-dlp/yt-dlp/commit/0b6f829b1dfda15d3c1d7d1fbe4ea6102c26dd24) ([#8673](https://github.com/yt-dlp/yt-dlp/issues/8673)) by [Grub4K](https://github.com/Grub4K) +- **webvtt**: [Allow spaces before newlines for CueBlock](https://github.com/yt-dlp/yt-dlp/commit/15f22b4880b6b3f71f350c64d70976ae65b9f1ca) ([#7681](https://github.com/yt-dlp/yt-dlp/issues/7681)) by [TSRBerry](https://github.com/TSRBerry) (With fixes in [298230e](https://github.com/yt-dlp/yt-dlp/commit/298230e550886b746c266724dd701d842ca2696e) by [pukkandan](https://github.com/pukkandan)) + +#### Extractor changes +- [Add `media_type` field](https://github.com/yt-dlp/yt-dlp/commit/e370f9ec36972d06100a3db893b397bfc1b07b4d) by [trainman261](https://github.com/trainman261) +- [Extract from `media` elements in SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/ddb2d7588bea48bae965dbfabe6df6550c9d3d43) ([#8504](https://github.com/yt-dlp/yt-dlp/issues/8504)) by [seproDev](https://github.com/seproDev) +- **abematv**: [Fix season metadata](https://github.com/yt-dlp/yt-dlp/commit/cc07f5cc85d9e2a6cd0bedb9d961665eea0d6047) ([#8607](https://github.com/yt-dlp/yt-dlp/issues/8607)) by [middlingphys](https://github.com/middlingphys) +- **allstar**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/3237f8ba29fe13bf95ff42b1e48b5b5109715feb) ([#8274](https://github.com/yt-dlp/yt-dlp/issues/8274)) by [S-Aarab](https://github.com/S-Aarab) +- **altcensored**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3f90813f0617e0d21302398010de7496c9ae36aa) ([#8291](https://github.com/yt-dlp/yt-dlp/issues/8291)) by [drzraf](https://github.com/drzraf) +- **ard**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/5f009a094f0e8450792b097c4c8273622778052d) ([#8878](https://github.com/yt-dlp/yt-dlp/issues/8878)) by [seproDev](https://github.com/seproDev) +- **ardbetamediathek**: [Fix series extraction](https://github.com/yt-dlp/yt-dlp/commit/1f8bd8eba82ba10ddb49ee7cc0be4540dab103d5) ([#8687](https://github.com/yt-dlp/yt-dlp/issues/8687)) by [lstrojny](https://github.com/lstrojny) +- **bbc** + - [Extract more formats](https://github.com/yt-dlp/yt-dlp/commit/c919b68f7e79ea5010f75f648d3c9e45405a8011) ([#8321](https://github.com/yt-dlp/yt-dlp/issues/8321)) by [barsnick](https://github.com/barsnick), [dirkf](https://github.com/dirkf) + - [Fix JSON parsing bug](https://github.com/yt-dlp/yt-dlp/commit/19741ab8a401ec64d5e84fdbfcfb141d105e7bc8) by [bashonly](https://github.com/bashonly) +- **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4903f452b68efb62dadf22e81be8c7934fc743e7) ([#8651](https://github.com/yt-dlp/yt-dlp/issues/8651)) by [bashonly](https://github.com/bashonly) +- **bilibili**: [Support courses and interactive videos](https://github.com/yt-dlp/yt-dlp/commit/9f09bdcfcb8e2b4b2decdc30d35d34b993bc7a94) ([#8343](https://github.com/yt-dlp/yt-dlp/issues/8343)) by [c-basalt](https://github.com/c-basalt) +- **bitchute**: [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/b1a1ec1540605d2ea7abdb63336ffb1c56bf6316) ([#8507](https://github.com/yt-dlp/yt-dlp/issues/8507)) by [SirElderling](https://github.com/SirElderling) +- **box**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/5a230233d6fce06f4abd1fce0dc92b948e6f780b) ([#8649](https://github.com/yt-dlp/yt-dlp/issues/8649)) by [bashonly](https://github.com/bashonly) +- **bundestag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00a3e47bf5440c96025a76e08337ff2a475ed83e) ([#8783](https://github.com/yt-dlp/yt-dlp/issues/8783)) by [Grub4K](https://github.com/Grub4K) +- **drtv**: [Set default ext for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f96ab86cd837b1b5823baa87d144e15322ee9298) ([#8590](https://github.com/yt-dlp/yt-dlp/issues/8590)) by [seproDev](https://github.com/seproDev) +- **duoplay**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/66a0127d45033c698bdbedf162cddc55d9e7b906) ([#8542](https://github.com/yt-dlp/yt-dlp/issues/8542)) by [glensc](https://github.com/glensc) +- **eplus**: [Add login support and DRM detection](https://github.com/yt-dlp/yt-dlp/commit/d5d1517e7d838500800d193ac3234b06e89654cd) ([#8661](https://github.com/yt-dlp/yt-dlp/issues/8661)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **facebook** + - [Fix Memories extraction](https://github.com/yt-dlp/yt-dlp/commit/c39358a54bc6675ae0c50b81024e5a086e41656a) ([#8681](https://github.com/yt-dlp/yt-dlp/issues/8681)) by [kclauhk](https://github.com/kclauhk) + - [Improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/9cafb9ff17e14475a35c9a58b5bb010c86c9db4b) ([#8296](https://github.com/yt-dlp/yt-dlp/issues/8296)) by [kclauhk](https://github.com/kclauhk) +- **floatplane**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/628fa244bbce2ad39775a5959e99588f30cac152) ([#8639](https://github.com/yt-dlp/yt-dlp/issues/8639)) by [seproDev](https://github.com/seproDev) +- **francetv**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/71f28097fec1c9e029f74b68a4eadc8915399840) ([#8409](https://github.com/yt-dlp/yt-dlp/issues/8409)) by [Fymyte](https://github.com/Fymyte) +- **instagram**: [Fix stories extraction](https://github.com/yt-dlp/yt-dlp/commit/50eaea9fd7787546b53660e736325fa31c77765d) ([#8843](https://github.com/yt-dlp/yt-dlp/issues/8843)) by [bashonly](https://github.com/bashonly) +- **joqrag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/db8b4edc7d0bd27da462f6fe82ff6e13e3d68a04) ([#8384](https://github.com/yt-dlp/yt-dlp/issues/8384)) by [pzhlkj6612](https://github.com/pzhlkj6612) +- **litv**: [Fix premium content extraction](https://github.com/yt-dlp/yt-dlp/commit/f45c4efcd928a173e1300a8f1ce4258e70c969b1) ([#8842](https://github.com/yt-dlp/yt-dlp/issues/8842)) by [bashonly](https://github.com/bashonly) +- **maariv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c5f01bf7d4b9426c87c3f8248de23934a56579e0) ([#8331](https://github.com/yt-dlp/yt-dlp/issues/8331)) by [amir16yp](https://github.com/amir16yp) +- **mediastream**: [Fix authenticated format extraction](https://github.com/yt-dlp/yt-dlp/commit/b03c89309eb141be1a1eceeeb7475dd3b7529ad9) ([#8657](https://github.com/yt-dlp/yt-dlp/issues/8657)) by [NickCis](https://github.com/NickCis) +- **nebula**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/45d82be65f71bb05506bd55376c6fdb36bc54142) ([#8566](https://github.com/yt-dlp/yt-dlp/issues/8566)) by [elyse0](https://github.com/elyse0), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **nintendo**: [Fix Nintendo Direct extraction](https://github.com/yt-dlp/yt-dlp/commit/1d24da6c899ef280d8b0a48a5e280ecd5d39cdf4) ([#8609](https://github.com/yt-dlp/yt-dlp/issues/8609)) by [Grub4K](https://github.com/Grub4K) +- **ondemandkorea**: [Fix upgraded format extraction](https://github.com/yt-dlp/yt-dlp/commit/04a5e06350e3ef7c03f94f2f3f90dd96c6411152) ([#8677](https://github.com/yt-dlp/yt-dlp/issues/8677)) by [seproDev](https://github.com/seproDev) +- **pr0gramm**: [Support variant formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/f98a3305eb124a0c375d03209d5c5a64fe1766c8) ([#8674](https://github.com/yt-dlp/yt-dlp/issues/8674)) by [Grub4K](https://github.com/Grub4K) +- **rinsefm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c91af948e43570025e4aa887e248fd025abae394) ([#8778](https://github.com/yt-dlp/yt-dlp/issues/8778)) by [hashFactory](https://github.com/hashFactory) +- **rudovideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0d531c35eca4c2eb36e160530a7a333edbc727cc) ([#8664](https://github.com/yt-dlp/yt-dlp/issues/8664)) by [nicodato](https://github.com/nicodato) +- **theguardian**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1fa3f24d4b5d22176b11d78420f1f4b64a5af0a8) ([#8535](https://github.com/yt-dlp/yt-dlp/issues/8535)) by [SirElderling](https://github.com/SirElderling) +- **theplatform**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/7e09c147fdccb44806bbf601573adc4b77210a89) ([#8635](https://github.com/yt-dlp/yt-dlp/issues/8635)) by [trainman261](https://github.com/trainman261) +- **twitcasting**: [Detect livestreams via API and `show` page](https://github.com/yt-dlp/yt-dlp/commit/585d0ed9abcfcb957f2b2684b8ad43c3af160383) ([#8601](https://github.com/yt-dlp/yt-dlp/issues/8601)) by [bashonly](https://github.com/bashonly), [JC-Chung](https://github.com/JC-Chung) +- **twitcastinguser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ff2fde1b8f922fd34bae6172602008cd67c07c93) ([#8650](https://github.com/yt-dlp/yt-dlp/issues/8650)) by [bashonly](https://github.com/bashonly) +- **twitter** + - [Extract stale tweets](https://github.com/yt-dlp/yt-dlp/commit/1c54a98e19d047e7c15184237b6ef8ad50af489c) ([#8724](https://github.com/yt-dlp/yt-dlp/issues/8724)) by [bashonly](https://github.com/bashonly) + - [Prioritize m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/e7d22348e77367740da78a3db27167ecf894b7c9) ([#8826](https://github.com/yt-dlp/yt-dlp/issues/8826)) by [bashonly](https://github.com/bashonly) + - [Work around API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/116c268438ea4d3738f6fa502c169081ca8f0ee7) ([#8825](https://github.com/yt-dlp/yt-dlp/issues/8825)) by [bashonly](https://github.com/bashonly) + - broadcast: [Extract `concurrent_view_count`](https://github.com/yt-dlp/yt-dlp/commit/6fe82491ed622b948c512cf4aab46ac3a234ae0a) ([#8600](https://github.com/yt-dlp/yt-dlp/issues/8600)) by [sonmezberkay](https://github.com/sonmezberkay) +- **vidly**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/34df1c1f60fa652c0a6a5c712b06c10e45daf6b7) ([#8612](https://github.com/yt-dlp/yt-dlp/issues/8612)) by [seproDev](https://github.com/seproDev) +- **vocaroo**: [Do not use deprecated `getheader`](https://github.com/yt-dlp/yt-dlp/commit/f223b1b0789f65e06619dcc9fc9e74f50d259379) ([#8606](https://github.com/yt-dlp/yt-dlp/issues/8606)) by [qbnu](https://github.com/qbnu) +- **vvvvid**: [Set user-agent to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1725e943b0e8a8b585305660d4611e684374409c) ([#8615](https://github.com/yt-dlp/yt-dlp/issues/8615)) by [Kyraminol](https://github.com/Kyraminol) +- **youtube** + - [Fix `like_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/6b5d93b0b0240e287389d1d43b2d5293e18aa4cc) ([#8763](https://github.com/yt-dlp/yt-dlp/issues/8763)) by [Ganesh910](https://github.com/Ganesh910) + - [Improve detection of faulty HLS formats](https://github.com/yt-dlp/yt-dlp/commit/bb5a54e6db2422bbd155d93a0e105b6616c09467) ([#8646](https://github.com/yt-dlp/yt-dlp/issues/8646)) by [bashonly](https://github.com/bashonly) + - [Return empty playlist when channel/tab has no videos](https://github.com/yt-dlp/yt-dlp/commit/044886c220620a7679109e92352890e18b6079e3) by [pukkandan](https://github.com/pukkandan) + - [Support cf.piped.video](https://github.com/yt-dlp/yt-dlp/commit/6a9c7a2b52655bacfa7ab2da24fd0d14a6fff495) ([#8514](https://github.com/yt-dlp/yt-dlp/issues/8514)) by [OIRNOIR](https://github.com/OIRNOIR) +- **zingmp3**: [Add support for radio and podcasts](https://github.com/yt-dlp/yt-dlp/commit/64de1a4c25bada90374b88d7353754fe8fbfcc51) ([#7189](https://github.com/yt-dlp/yt-dlp/issues/7189)) by [hatienl0i261299](https://github.com/hatienl0i261299) + +#### Postprocessor changes +- **ffmpegmetadata**: [Embed stream metadata in single format downloads](https://github.com/yt-dlp/yt-dlp/commit/deeb13eae82e60f82a2c0c5861f460399a997528) ([#8647](https://github.com/yt-dlp/yt-dlp/issues/8647)) by [bashonly](https://github.com/bashonly) + +#### Networking changes +- [Strip whitespace around header values](https://github.com/yt-dlp/yt-dlp/commit/196eb0fe77b78e2e5ca02c506c3837c2b1a7964c) ([#8802](https://github.com/yt-dlp/yt-dlp/issues/8802)) by [coletdjnz](https://github.com/coletdjnz) +- **Request Handler**: websockets: [Migrate websockets to networking framework](https://github.com/yt-dlp/yt-dlp/commit/ccfd70f4c24b579c72123ca76ab50164f8f122b7) ([#7720](https://github.com/yt-dlp/yt-dlp/issues/7720)) by [coletdjnz](https://github.com/coletdjnz) + +#### Misc. changes +- **ci** + - [Concurrency optimizations](https://github.com/yt-dlp/yt-dlp/commit/f124fa458826308afc86cf364c509f857686ecfd) ([#8614](https://github.com/yt-dlp/yt-dlp/issues/8614)) by [Grub4K](https://github.com/Grub4K) + - [Run core tests only for core changes](https://github.com/yt-dlp/yt-dlp/commit/13b3cb3c2b7169a1e17d6fc62593bf744170521c) ([#8841](https://github.com/yt-dlp/yt-dlp/issues/8841)) by [Grub4K](https://github.com/Grub4K) +- **cleanup** + - [Fix spelling of `IE_NAME`](https://github.com/yt-dlp/yt-dlp/commit/bc4ab17b38f01000d99c5c2bedec89721fee65ec) ([#8810](https://github.com/yt-dlp/yt-dlp/issues/8810)) by [barsnick](https://github.com/barsnick) + - [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/9751a457cfdb18bf99d9ee0d10e4e6a594502bbf) ([#8604](https://github.com/yt-dlp/yt-dlp/issues/8604)) by [seproDev](https://github.com/seproDev) + - Miscellaneous: [f9fb3ce](https://github.com/yt-dlp/yt-dlp/commit/f9fb3ce86e3c6a0c3c33b45392b8d7288bceba76) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) +- **devscripts**: `run_tests`: [Create Python script](https://github.com/yt-dlp/yt-dlp/commit/2d1d683a541d71f3d3bb999dfe8eeb1976fb91ce) ([#8720](https://github.com/yt-dlp/yt-dlp/issues/8720)) by [Grub4K](https://github.com/Grub4K) (With fixes in [225cf2b](https://github.com/yt-dlp/yt-dlp/commit/225cf2b830a1de2c5eacd257edd2a01aed1e1114)) +- **docs**: [Update youtube-dl merge commit in `README.md`](https://github.com/yt-dlp/yt-dlp/commit/f10589e3453009bb523f55849bba144c9b91cf2a) by [bashonly](https://github.com/bashonly) +- **test**: networking: [Update tests for OpenSSL 3.2](https://github.com/yt-dlp/yt-dlp/commit/37755a037e612bfc608c3d4722e8ef2ce6a022ee) ([#8814](https://github.com/yt-dlp/yt-dlp/issues/8814)) by [bashonly](https://github.com/bashonly) + ### 2023.11.16 #### Extractor changes diff --git a/Collaborators.md b/Collaborators.md index 70ab616f1132..894a853c9b8d 100644 --- a/Collaborators.md +++ b/Collaborators.md @@ -29,6 +29,7 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho [![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz) * Improved plugin architecture +* Rewrote the networking infrastructure, implemented support for `requests` * YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements * Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc * Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc @@ -46,16 +47,17 @@ You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [autho ## [bashonly](https://github.com/bashonly) -* `--update-to`, automated release, nightly builds -* `--cookies-from-browser` support for Firefox containers -* Added support for new websites Genius, Kick, NBCStations, Triller, VideoKen etc -* Improved/fixed support for Anvato, Brightcove, Instagram, ParamountPlus, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc +* `--update-to`, self-updater rewrite, automated/nightly/master releases +* `--cookies-from-browser` support for Firefox containers, external downloader cookie handling overhaul +* Added support for new websites like Dacast, Kick, NBCStations, Triller, VideoKen, Weverse, WrestleUniverse etc +* Improved/fixed support for Anvato, Brightcove, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc ## [Grub4K](https://github.com/Grub4K) -[![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) [![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) +[![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) -* `--update-to`, automated release, nightly builds -* Rework internals like `traverse_obj`, various core refactors and bugs fixes -* Helped fix crunchyroll, Twitter, wrestleuniverse, wistia, slideslive etc +* `--update-to`, self-updater rewrite, automated/nightly/master releases +* Reworked internals like `traverse_obj`, various core refactors and bugs fixes +* Implemented proper progress reporting for parallel downloads +* Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc diff --git a/README.md b/README.md index 06aceec02718..16947ce30be0 100644 --- a/README.md +++ b/README.md @@ -76,7 +76,7 @@ yt-dlp is a [youtube-dl](https://github.com/ytdl-org/youtube-dl) fork based on t # NEW FEATURES -* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@66ab08**](https://github.com/ytdl-org/youtube-dl/commit/66ab0814c4baa2dc79c2dd5287bc0ad61a37c5b9) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) +* Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@be008e6**](https://github.com/ytdl-org/youtube-dl/commit/be008e657d79832642e2158557c899249c9e31cd) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API @@ -159,6 +159,7 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filter` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this * yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. +* The sub-module `swfinterp` is removed. For ease of use, a few more compat options are available: @@ -279,7 +280,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection) as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) - There are bugs in ffmpeg that causes various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds + There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds **Important**: What you need is ffmpeg *binary*, **NOT** [the python package of the same name](https://pypi.org/project/ffmpeg) @@ -299,7 +300,7 @@ While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) -* [**secretstorage**](https://github.com/mitya57/secretstorage) - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) +* [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * Any external downloader that you want to use with `--downloader` ### Deprecated diff --git a/devscripts/changelog_override.json b/devscripts/changelog_override.json index ca811cb650be..8c5286432219 100644 --- a/devscripts/changelog_override.json +++ b/devscripts/changelog_override.json @@ -114,5 +114,11 @@ "action": "add", "when": "f04b5bedad7b281bee9814686bba1762bae092eb", "short": "[priority] Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)\n\t- Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers" + }, + { + "action": "change", + "when": "15f22b4880b6b3f71f350c64d70976ae65b9f1ca", + "short": "[webvtt] Allow spaces before newlines for CueBlock (#7681)", + "authors": ["TSRBerry"] } ] diff --git a/devscripts/make_changelog.py b/devscripts/make_changelog.py index d0e893e58112..123eebc2a96f 100644 --- a/devscripts/make_changelog.py +++ b/devscripts/make_changelog.py @@ -40,20 +40,6 @@ def subgroup_lookup(cls): return { name: group for group, names in { - cls.CORE: { - 'aes', - 'cache', - 'compat_utils', - 'compat', - 'cookies', - 'dependencies', - 'formats', - 'jsinterp', - 'outtmpl', - 'plugins', - 'update', - 'utils', - }, cls.MISC: { 'build', 'ci', @@ -404,9 +390,9 @@ def groups(self): if not group: if self.EXTRACTOR_INDICATOR_RE.search(commit.short): group = CommitGroup.EXTRACTOR + logger.error(f'Assuming [ie] group for {commit.short!r}') else: - group = CommitGroup.POSTPROCESSOR - logger.warning(f'Failed to map {commit.short!r}, selected {group.name.lower()}') + group = CommitGroup.CORE commit_info = CommitInfo( details, sub_details, message.strip(), diff --git a/devscripts/make_issue_template.py b/devscripts/make_issue_template.py index 6c85e200fe89..a5d59f3c03d8 100644 --- a/devscripts/make_issue_template.py +++ b/devscripts/make_issue_template.py @@ -9,11 +9,7 @@ import re -from devscripts.utils import ( - get_filename_args, - read_file, - write_file, -) +from devscripts.utils import get_filename_args, read_file, write_file VERBOSE_TMPL = ''' - type: checkboxes diff --git a/requirements.txt b/requirements.txt index d983fa03ffcc..06ff82a80095 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ mutagen pycryptodomex -websockets brotli; implementation_name=='cpython' brotlicffi; implementation_name!='cpython' certifi diff --git a/supportedsites.md b/supportedsites.md index 0e971c135e2d..96681c16b99c 100644 --- a/supportedsites.md +++ b/supportedsites.md @@ -1,6 +1,4 @@ # Supported sites - - **0000studio:archive** - - **0000studio:clip** - **17live** - **17live:clip** - **1News**: 1news.co.nz article videos @@ -9,7 +7,6 @@ - **23video** - **247sports** - **24tv.ua** - - **24video** - **3qsdn**: 3Q SDN - **3sat** - **4tube** @@ -50,15 +47,18 @@ - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv:user** - - **AirMozilla** - **AirTV** - **AitubeKZVideo** - **AliExpressLive** - **AlJazeera** - **Allocine** + - **Allstar** + - **AllstarProfile** - **AlphaPorno** - **Alsace20TV** - **Alsace20TVEmbed** + - **altcensored** + - **altcensored:channel** - **Alura**: [*alura*](## "netrc machine") - **AluraCourse**: [*aluracourse*](## "netrc machine") - **Amara** @@ -79,7 +79,7 @@ - **ant1newsgr:embed**: ant1news.gr embedded videos - **antenna:watch**: antenna.gr and ant1news.gr videos - **Anvato** - - **aol.com**: Yahoo screen and movies + - **aol.com**: Yahoo screen and movies (**Currently broken**) - **APA** - **Aparat** - **AppleConnect** @@ -90,8 +90,8 @@ - **archive.org**: archive.org video and audio - **ArcPublishing** - **ARD** - - **ARD:mediathek** - - **ARDBetaMediathek** + - **ARDMediathek** + - **ARDMediathekCollection** - **Arkena** - **arte.sky.it** - **ArteTV** @@ -100,7 +100,6 @@ - **ArteTVPlaylist** - **AtresPlayer**: [*atresplayer*](## "netrc machine") - **AtScaleConfEvent** - - **ATTTechChannel** - **ATVAt** - **AudiMedia** - **AudioBoom** @@ -140,12 +139,12 @@ - **BeatBumpVideo** - **Beatport** - **Beeg** - - **BehindKink** + - **BehindKink**: (**Currently broken**) - **Bellator** - **BellMedia** - **BerufeTV** - - **Bet** - - **bfi:player** + - **Bet**: (**Currently broken**) + - **bfi:player**: (**Currently broken**) - **bfmtv** - **bfmtv:article** - **bfmtv:live** @@ -162,6 +161,8 @@ - **BiliBiliBangumi** - **BiliBiliBangumiMedia** - **BiliBiliBangumiSeason** + - **BilibiliCheese** + - **BilibiliCheeseSeason** - **BilibiliCollectionList** - **BilibiliFavoritesList** - **BiliBiliPlayer** @@ -176,11 +177,8 @@ - **BiliLive** - **BioBioChileTV** - **Biography** - - **BIQLE** - **BitChute** - **BitChuteChannel** - - **bitwave:replay** - - **bitwave:stream** - **BlackboardCollaborate** - **BleacherReport** - **BleacherReportCMS** @@ -193,7 +191,7 @@ - **Box** - **BoxCastVideo** - **Bpb**: Bundeszentrale für politische Bildung - - **BR**: Bayerischer Rundfunk + - **BR**: Bayerischer Rundfunk (**Currently broken**) - **BrainPOP**: [*brainpop*](## "netrc machine") - **BrainPOPELL**: [*brainpop*](## "netrc machine") - **BrainPOPEsp**: [*brainpop*](## "netrc machine") BrainPOP Español @@ -201,19 +199,18 @@ - **BrainPOPIl**: [*brainpop*](## "netrc machine") BrainPOP Hebrew - **BrainPOPJr**: [*brainpop*](## "netrc machine") - **BravoTV** - - **Break** - **BreitBart** - **brightcove:legacy** - **brightcove:new** - **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - - **BRMediathek**: Bayerischer Rundfunk Mediathek - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **Bundesliga** + - **Bundestag** - **BusinessInsider** - **BuzzFeed** - - **BYUtv** + - **BYUtv**: (**Currently broken**) - **CableAV** - **Callin** - **Caltrans** @@ -225,14 +222,11 @@ - **CamModels** - **Camsoda** - **CamtasiaEmbed** - - **CamWithHer** - **Canal1** - **CanalAlpha** - **canalc2.tv** - **Canalplus**: mycanal.fr and piwiplus.fr - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine") - - **CarambaTV** - - **CarambaTVPage** - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** @@ -254,16 +248,12 @@ - **Cellebrite** - **CeskaTelevize** - **CGTN** - - **channel9**: Channel 9 - **CharlieRose** - **Chaturbate** - **Chilloutzone** - **Chingari** - **ChingariUser** - - **chirbit** - - **chirbit:profile** - **cielotv.it** - - **Cinchcast** - **Cinemax** - **CinetecaMilano** - **Cineverse** @@ -276,14 +266,12 @@ - **cliphunter** - **Clippit** - **ClipRs** - - **Clipsyndicate** - **ClipYouEmbed** - **CloserToTruth** - **CloudflareStream** - - **Cloudy** - - **Clubic** + - **Clubic**: (**Currently broken**) - **Clyp** - - **cmt.com** + - **cmt.com**: (**Currently broken**) - **CNBC** - **CNBCVideo** - **CNN** @@ -328,7 +316,6 @@ - **CybraryCourse**: [*cybrary*](## "netrc machine") - **DacastPlaylist** - **DacastVOD** - - **Daftsex** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** - **dailymotion**: [*dailymotion*](## "netrc machine") @@ -347,13 +334,12 @@ - **DctpTv** - **DeezerAlbum** - **DeezerPlaylist** - - **defense.gouv.fr** - **democracynow** - **DestinationAmerica** - **DetikEmbed** - **DeuxM** - **DeuxMNews** - - **DHM**: Filmarchiv - Deutsches Historisches Museum + - **DHM**: Filmarchiv - Deutsches Historisches Museum (**Currently broken**) - **Digg** - **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor - **DigitallySpeaking** @@ -373,7 +359,6 @@ - **dlf:corpus**: DLF Multi-feed Archives - **dlive:stream** - **dlive:vod** - - **Dotsub** - **Douyin** - **DouyuShow** - **DouyuTV**: 斗鱼直播 @@ -392,35 +377,29 @@ - **duboku**: www.duboku.io - **duboku:list**: www.duboku.io entire series - **Dumpert** + - **Duoplay** - **dvtv**: http://video.aktualne.cz/ - **dw** - **dw:article** - **EaglePlatform** - **EbaumsWorld** - **Ebay** - - **EchoMsk** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson - - **ehftv** - - **eHow** - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") - **Einthusan** - **eitb.tv** - - **ElevenSports** - - **EllenTube** - - **EllenTubePlaylist** - - **EllenTubeVideo** - **Elonet** - **ElPais**: El País - **ElTreceTV**: El Trece TV (Argentina) - **Embedly** - **EMPFlix** - - **Engadget** - **Epicon** - **EpiconSeries** - - **eplus:inbound**: e+ (イープラス) overseas + - **EpidemicSound** + - **eplus**: [*eplus*](## "netrc machine") e+ (イープラス) - **Epoch** - **Eporner** - **Erocast** @@ -429,11 +408,9 @@ - **ertflix**: ERTFLIX videos - **ertflix:codename**: ERTFLIX videos by codename - **ertwebtv:embed**: ert.gr webtv embedded videos - - **Escapist** - **ESPN** - **ESPNArticle** - **ESPNCricInfo** - - **EsriVideo** - **EttuTv** - **Europa** - **EuroParlWebstream** @@ -443,9 +420,7 @@ - **EWETV**: [*ewetv*](## "netrc machine") - **EWETVLive**: [*ewetv*](## "netrc machine") - **EWETVRecordings**: [*ewetv*](## "netrc machine") - - **ExpoTV** - **Expressen** - - **ExtremeTube** - **EyedoTV** - **facebook**: [*facebook*](## "netrc machine") - **facebook:reel** @@ -465,6 +440,8 @@ - **FiveThirtyEight** - **FiveTV** - **Flickr** + - **Floatplane** + - **FloatplaneChannel** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FoodNetwork** - **FootyRoom** @@ -472,7 +449,6 @@ - **FOX** - **FOX9** - **FOX9News** - - **Foxgay** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** - **FoxNewsVideo** @@ -496,7 +472,6 @@ - **funimation:show**: [*funimation*](## "netrc machine") - **Funk** - **Funker530** - - **Fusion** - **Fux** - **FuyinTV** - **Gab** @@ -522,7 +497,6 @@ - **GeniusLyrics** - **Gettr** - **GettrStreaming** - - **Gfycat** - **GiantBomb** - **Giga** - **GlattvisionTV**: [*glattvisiontv*](## "netrc machine") @@ -564,7 +538,6 @@ - **HearThisAt** - **Heise** - **HellPorno** - - **Helsinki**: helsinki.fi - **hetklokhuis** - **hgtv.com:show** - **HGTVDe** @@ -573,8 +546,6 @@ - **HistoricFilms** - **history:player** - **history:topic**: History.com Topic - - **hitbox** - - **hitbox:live** - **HitRecord** - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau - **HollywoodReporter** @@ -585,8 +556,6 @@ - **hotstar:playlist** - **hotstar:season** - **hotstar:series** - - **Howcast** - - **HowStuffWorks** - **hrfernsehen** - **HRTi**: [*hrti*](## "netrc machine") - **HRTiPlaylist**: [*hrti*](## "netrc machine") @@ -608,7 +577,7 @@ - **ign.com** - **IGNArticle** - **IGNVideo** - - **IHeartRadio** + - **iheartradio** - **iheartradio:podcast** - **Iltalehti** - **imdb**: Internet Movie Database trailers @@ -638,7 +607,6 @@ - **IsraelNationalNews** - **ITProTV** - **ITProTVCourse** - - **ITTF** - **ITV** - **ITVBTCC** - **ivi**: ivi.ru @@ -658,6 +626,7 @@ - **JioSaavnAlbum** - **JioSaavnSong** - **Joj** + - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **Jove** - **JStream** - **JTBC**: jtbc.co.kr @@ -670,7 +639,6 @@ - **Karaoketv** - **KarriereVideos** - **Katsomo** - - **KeezMovies** - **KelbyOne** - **Ketnet** - **khanacademy** @@ -679,7 +647,7 @@ - **Kicker** - **KickStarter** - **KickVOD** - - **KinjaEmbed** + - **kinja:embed** - **KinoPoisk** - **Kommunetv** - **KompasVideo** @@ -698,8 +666,6 @@ - **la7.it** - **la7.it:​pod:episode** - **la7.it:podcast** - - **laola1tv** - - **laola1tv:embed** - **LastFM** - **LastFMPlaylist** - **LastFMUser** @@ -733,7 +699,6 @@ - **LinkedIn**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine") - **linkedin:​learning:course**: [*linkedin*](## "netrc machine") - - **LinuxAcademy**: [*linuxacademy*](## "netrc machine") - **Liputan6** - **ListenNotes** - **LiTV** @@ -751,7 +716,7 @@ - **Lumni** - **lynda**: [*lynda*](## "netrc machine") lynda.com videos - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses - - **m6** + - **maariv.co.il** - **MagellanTV** - **MagentaMusik360** - **mailru**: Видео@Mail.Ru @@ -793,11 +758,8 @@ - **megatvcom:embed**: megatv.com embedded videos - **Meipai**: 美拍 - **MelonVOD** - - **META** - - **metacafe** - **Metacritic** - **mewatch** - - **Mgoon** - **MiaoPai** - **MicrosoftEmbed** - **microsoftstream**: Microsoft Stream @@ -810,7 +772,6 @@ - **minds:group** - **MinistryGrid** - **Minoto** - - **miomio.tv** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** @@ -825,14 +786,10 @@ - **MLBTV**: [*mlb*](## "netrc machine") - **MLBVideo** - **MLSSoccer** - - **Mnet** - **MNetTV**: [*mnettv*](## "netrc machine") - **MNetTVLive**: [*mnettv*](## "netrc machine") - **MNetTVRecordings**: [*mnettv*](## "netrc machine") - **MochaVideo** - - **MoeVideo**: LetitBit video services: moevideo.net, playreplay.net and videochart.net - - **Mofosex** - - **MofosexEmbed** - **Mojvideo** - **Monstercat** - **MonsterSirenHypergryphMusic** @@ -843,13 +800,12 @@ - **Motorsport**: motorsport.com - **MotorTrend** - **MotorTrendOnDemand** - - **MovieClips** - **MovieFap** - **Moviepilot** - **MoviewPlay** - **Moviezine** - **MovingImage** - - **MSN** + - **MSN**: (**Currently broken**) - **mtg**: MTG services - **mtv** - **mtv.de** @@ -871,18 +827,13 @@ - **MusicdexSong** - **mva**: Microsoft Virtual Academy videos - **mva:course**: Microsoft Virtual Academy courses - - **Mwave** - - **MwaveMeetGreet** - **Mxplayer** - **MxplayerShow** - - **MyChannels** - **MySpace** - **MySpace:album** - **MySpass** - - **Myvi** - **MyVideoGe** - **MyVidster** - - **MyviEmbed** - **Mzaalo** - **n-tv.de** - **N1Info:article** @@ -894,12 +845,12 @@ - **Naver** - **Naver:live** - **navernow** - - **NBA** + - **nba** + - **nba:channel** + - **nba:embed** - **nba:watch** - **nba:​watch:collection** - - **NBAChannel** - - **NBAEmbed** - - **NBAWatchEmbed** + - **nba:​watch:embed** - **NBC** - **NBCNews** - **nbcolympics** @@ -914,6 +865,7 @@ - **NDTV** - **Nebula**: [*watchnebula*](## "netrc machine") - **nebula:channel**: [*watchnebula*](## "netrc machine") + - **nebula:class**: [*watchnebula*](## "netrc machine") - **nebula:subscriptions**: [*watchnebula*](## "netrc machine") - **NekoHacker** - **NerdCubedFeed** @@ -935,7 +887,6 @@ - **Newgrounds:playlist** - **Newgrounds:user** - **NewsPicks** - - **Newstube** - **Newsy** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 @@ -961,7 +912,6 @@ - **nick.de** - **nickelodeon:br** - **nickelodeonru** - - **nicknight** - **niconico**: [*niconico*](## "netrc machine") ニコニコ動画 - **niconico:history**: NicoNico user history or likes. Requires cookies. - **niconico:live**: ニコニコ生放送 @@ -984,9 +934,7 @@ - **NonkTube** - **NoodleMagazine** - **Noovo** - - **Normalboots** - **NOSNLArticle** - - **NosVideo** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** - **NovaPlay** @@ -1009,7 +957,7 @@ - **NRKTVEpisodes** - **NRKTVSeason** - **NRKTVSeries** - - **NRLTV** + - **NRLTV**: (**Currently broken**) - **ntv.ru** - **NubilesPorn**: [*nubiles-porn*](## "netrc machine") - **Nuvid** @@ -1037,8 +985,6 @@ - **onet.tv:channel** - **OnetMVP** - **OnionStudios** - - **Ooyala** - - **OoyalaExternal** - **Opencast** - **OpencastPlaylist** - **openrec** @@ -1060,7 +1006,6 @@ - **PalcoMP3:artist** - **PalcoMP3:song** - **PalcoMP3:video** - - **pandora.tv**: 판도라TV - **Panopto** - **PanoptoList** - **PanoptoPlaylist** @@ -1082,7 +1027,6 @@ - **PeerTube:Playlist** - **peloton**: [*peloton*](## "netrc machine") - **peloton:live**: Peloton Live - - **People** - **PerformGroup** - **periscope**: Periscope - **periscope:user**: Periscope user videos @@ -1104,14 +1048,11 @@ - **PlanetMarathi** - **Platzi**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine") - - **play.fm** - **player.sky.it** - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlayStuff** - - **PlaysTV** - **PlaySuisse** - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - - **Playvid** - **PlayVids** - **Playwire** - **pluralsight**: [*pluralsight*](## "netrc machine") @@ -1136,11 +1077,8 @@ - **Popcorntimes** - **PopcornTV** - **Pornbox** - - **PornCom** - **PornerBros** - - **Pornez** - **PornFlip** - - **PornHd** - **PornHub**: [*pornhub*](## "netrc machine") PornHub and Thumbzilla - **PornHubPagedVideoList**: [*pornhub*](## "netrc machine") - **PornHubPlaylist**: [*pornhub*](## "netrc machine") @@ -1182,7 +1120,6 @@ - **Radiko** - **RadikoRadio** - **radio.de** - - **radiobremen** - **radiocanada** - **radiocanada:audiovideo** - **RadioComercial** @@ -1222,7 +1159,6 @@ - **RCTIPlusSeries** - **RCTIPlusTV** - **RDS**: RDS.ca - - **Recurbate** - **RedBull** - **RedBullEmbed** - **RedBullTV** @@ -1239,7 +1175,7 @@ - **Reuters** - **ReverbNation** - **RheinMainTV** - - **RICE** + - **RinseFM** - **RMCDecouverte** - **RockstarGames** - **Rokfin**: [*rokfin*](## "netrc machine") @@ -1260,8 +1196,6 @@ - **rtl.lu:tele-vod** - **rtl.nl**: rtl.nl and rtlxl.nl - **rtl2** - - **rtl2:you** - - **rtl2:​you:series** - **RTLLuLive** - **RTLLuRadio** - **RTNews** @@ -1276,10 +1210,9 @@ - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams - **rtve.es:television** - - **RTVNH** - **RTVS** - **rtvslo.si** - - **RUHD** + - **RudoVideo** - **Rule34Video** - **Rumble** - **RumbleChannel** @@ -1326,8 +1259,8 @@ - **ScrippsNetworks** - **scrippsnetworks:watch** - **Scrolller** - - **SCTE**: [*scte*](## "netrc machine") - - **SCTECourse**: [*scte*](## "netrc machine") + - **SCTE**: [*scte*](## "netrc machine") (**Currently broken**) + - **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**) - **Seeker** - **SenalColombiaLive** - **SenateGov** @@ -1339,7 +1272,6 @@ - **SeznamZpravyArticle** - **Shahid**: [*shahid*](## "netrc machine") - **ShahidShow** - - **Shared**: shared.sx - **ShareVideosEmbed** - **ShemarooMe** - **ShowRoomLive** @@ -1391,7 +1323,6 @@ - **SovietsClosetPlaylist** - **SpankBang** - **SpankBangPlaylist** - - **Spankwire** - **Spiegel** - **Sport5** - **SportBox** @@ -1404,7 +1335,7 @@ - **SpreakerShowPage** - **SpringboardPlatform** - **Sprout** - - **sr:mediathek**: Saarländischer Rundfunk + - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**) - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **StacommuLive**: [*stacommu*](## "netrc machine") @@ -1421,7 +1352,6 @@ - **StoryFireSeries** - **StoryFireUser** - **Streamable** - - **streamcloud.eu** - **StreamCZ** - **StreamFF** - **StreetVoice** @@ -1437,7 +1367,6 @@ - **SVTPlay**: SVT Play and Öppet arkiv - **SVTSeries** - **SwearnetEpisode** - - **SWRMediathek** - **Syfy** - **SYVDK** - **SztvHu** @@ -1456,7 +1385,6 @@ - **TeachingChannel** - **Teamcoco** - **TeamTreeHouse**: [*teamtreehouse*](## "netrc machine") - - **TechTalks** - **techtv.mit.edu** - **TedEmbed** - **TedPlaylist** @@ -1486,6 +1414,8 @@ - **TFO** - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") + - **TheGuardianPodcast** + - **TheGuardianPodcastPlaylist** - **TheHoleTv** - **TheIntercept** - **ThePlatform** @@ -1506,27 +1436,23 @@ - **tiktok:sound**: (**Currently broken**) - **tiktok:tag**: (**Currently broken**) - **tiktok:user**: (**Currently broken**) - - **tinypic**: tinypic.com videos - **TLC** - **TMZ** - **TNAFlix** - **TNAFlixNetworkEmbed** - **toggle** - **toggo** - - **Tokentube** - - **Tokentube:channel** - **tokfm:audition** - **tokfm:podcast** - **ToonGoggles** - **tou.tv**: [*toutv*](## "netrc machine") - - **Toypics**: Toypics video - - **ToypicsUser**: Toypics user profile + - **Toypics**: Toypics video (**Currently broken**) + - **ToypicsUser**: Toypics user profile (**Currently broken**) - **TrailerAddict**: (**Currently broken**) - **TravelChannel** - **Triller**: [*triller*](## "netrc machine") - **TrillerShort** - **TrillerUser**: [*triller*](## "netrc machine") - - **Trilulilu** - **Trovo** - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix @@ -1536,7 +1462,7 @@ - **TruNews** - **Truth** - **TruTV** - - **Tube8** + - **Tube8**: (**Currently broken**) - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine") - **TubiTv**: [*tubitv*](## "netrc machine") @@ -1545,7 +1471,6 @@ - **TuneInPodcast** - **TuneInPodcastEpisode** - **TuneInStation** - - **TunePk** - **Turbo** - **tv.dfb.de** - **TV2** @@ -1569,14 +1494,7 @@ - **TVIPlayer** - **tvland.com** - **TVN24** - - **TVNet** - **TVNoe** - - **TVNow** - - **TVNowAnnual** - - **TVNowFilm** - - **TVNowNew** - - **TVNowSeason** - - **TVNowShow** - **tvopengr:embed**: tvopen.gr embedded videos - **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos - **tvp**: Telewizja Polska @@ -1614,7 +1532,6 @@ - **umg:de**: Universal Music Deutschland - **Unistra** - **Unity** - - **UnscriptedNewsVideo** - **uol.com.br** - **uplynk** - **uplynk:preplay** @@ -1629,7 +1546,6 @@ - **Utreon** - **Varzesh3** - **Vbox7** - - **VeeHD** - **Veo** - **Veoh** - **veoh:user** @@ -1642,7 +1558,6 @@ - **vice** - **vice:article** - **vice:show** - - **Vidbit** - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video @@ -1664,6 +1579,7 @@ - **VidioLive**: [*vidio*](## "netrc machine") - **VidioPremier**: [*vidio*](## "netrc machine") - **VidLii** + - **Vidly** - **viewlift** - **viewlift:embed** - **Viidea** @@ -1683,7 +1599,6 @@ - **Vimm:stream** - **ViMP** - **ViMP:Playlist** - - **Vimple**: Vimple - one-click video hosting - **Vine** - **vine:user** - **Viqeo** @@ -1691,7 +1606,6 @@ - **viu:ott**: [*viu*](## "netrc machine") - **viu:playlist** - **ViuOTTIndonesia** - - **Vivo**: vivo.sx - **vk**: [*vk*](## "netrc machine") VK - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos - **vk:wallpost**: [*vk*](## "netrc machine") @@ -1699,37 +1613,27 @@ - **VKPlayLive** - **vm.tiktok** - **Vocaroo** - - **Vodlocker** - **VODPl** - **VODPlatform** - - **VoiceRepublic** - **voicy** - **voicy:channel** - **VolejTV** - - **Voot**: [*voot*](## "netrc machine") - - **VootSeries**: [*voot*](## "netrc machine") + - **Voot**: [*voot*](## "netrc machine") (**Currently broken**) + - **VootSeries**: [*voot*](## "netrc machine") (**Currently broken**) - **VoxMedia** - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **vqq:series** - **vqq:video** - - **Vrak** - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX - - **vrv**: [*vrv*](## "netrc machine") - - **vrv:series** - - **VShare** - **VTM** - **VTXTV**: [*vtxtv*](## "netrc machine") - **VTXTVLive**: [*vtxtv*](## "netrc machine") - **VTXTVRecordings**: [*vtxtv*](## "netrc machine") - **VuClip** - - **Vupload** - **VVVVID** - **VVVVIDShow** - - **VyboryMos** - - **Vzaar** - - **Wakanim** - **Walla** - **WalyTV**: [*walytv*](## "netrc machine") - **WalyTVLive**: [*walytv*](## "netrc machine") @@ -1740,9 +1644,7 @@ - **washingtonpost** - **washingtonpost:article** - **wat.tv** - - **WatchBox** - **WatchESPN** - - **WatchIndianPorn**: Watch Indian Porn - **WDR** - **wdr:mobile**: (**Currently broken**) - **WDRElefant** @@ -1770,7 +1672,6 @@ - **whowatch** - **Whyp** - **wikimedia.org** - - **Willow** - **Wimbledon** - **WimTV** - **WinSportsVideo** @@ -1795,7 +1696,6 @@ - **wykop:post** - **wykop:​post:comment** - **Xanimu** - - **XBef** - **XboxClips** - **XFileShare**: XFileShare based sites: Aparat, ClipWatching, GoUnlimited, GoVid, HolaVid, Streamty, TheVideoBee, Uqload, VidBom, vidlo, VidLocker, VidShare, VUp, WolfStream, XVideoSharing - **XHamster** @@ -1807,9 +1707,6 @@ - **XMinus** - **XNXX** - **Xstream** - - **XTube** - - **XTubeUser**: XTube user profile - - **Xuite**: 隨意窩Xuite影音 - **XVideos** - **xvideos:quickies** - **XXXYMovies** @@ -1826,10 +1723,7 @@ - **YapFiles** - **Yappy** - **YappyProfile** - - **YesJapan** - - **yinyuetai:video**: 音悦Tai - **YleAreena** - - **Ynet** - **YouJizz** - **youku**: 优酷 - **youku:show** @@ -1877,6 +1771,9 @@ - **zingmp3:chart-home** - **zingmp3:chart-music-video** - **zingmp3:hub** + - **zingmp3:liveradio** + - **zingmp3:podcast** + - **zingmp3:podcast-episode** - **zingmp3:user** - **zingmp3:week-chart** - **zoom** diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py index 8bff08314596..0087cbc941fb 100644 --- a/test/test_YoutubeDL.py +++ b/test/test_YoutubeDL.py @@ -730,7 +730,7 @@ def expect_same_infodict(out): self.assertEqual(got_dict.get(info_field), expected, info_field) return True - test('%()j', (expect_same_infodict, str)) + test('%()j', (expect_same_infodict, None)) # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' diff --git a/test/test_update.py b/test/test_update.py index a5a388c10698..bc139562f4a4 100644 --- a/test/test_update.py +++ b/test/test_update.py @@ -9,7 +9,7 @@ from test.helper import FakeYDL, report_warning -from yt_dlp.update import Updater, UpdateInfo +from yt_dlp.update import UpdateInfo, Updater # XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES diff --git a/test/test_utils.py b/test/test_utils.py index 6c8571f980f2..09c648cf8939 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -2110,6 +2110,8 @@ def test_traverse_obj(self): self.assertEqual(traverse_obj(_TEST_DATA, (..., {str_or_none})), [item for item in map(str_or_none, _TEST_DATA.values()) if item is not None], msg='Function in set should be a transformation') + self.assertEqual(traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})), 'const', + msg='Function in set should always be called') if __debug__: with self.assertRaises(Exception, msg='Sets with length != 1 should raise in debug'): traverse_obj(_TEST_DATA, set()) @@ -2338,6 +2340,58 @@ def test_traverse_obj(self): self.assertEqual(traverse_obj(mobj, lambda k, _: k in (0, 'group')), ['0123', '3'], msg='function on a `re.Match` should give group name as well') + # Test xml.etree.ElementTree.Element as input obj + etree = xml.etree.ElementTree.fromstring(''' + + + 1 + 2008 + 141100 + + + + + 4 + 2011 + 59900 + + + + 68 + 2011 + 13600 + + + + ''') + self.assertEqual(traverse_obj(etree, ''), etree, + msg='empty str key should return the element itself') + self.assertEqual(traverse_obj(etree, 'country'), list(etree), + msg='str key should lead all children with that tag name') + self.assertEqual(traverse_obj(etree, ...), list(etree), + msg='`...` as key should return all children') + self.assertEqual(traverse_obj(etree, lambda _, x: x[0].text == '4'), [etree[1]], + msg='function as key should get element as value') + self.assertEqual(traverse_obj(etree, lambda i, _: i == 1), [etree[1]], + msg='function as key should get index as key') + self.assertEqual(traverse_obj(etree, 0), etree[0], + msg='int key should return the nth child') + self.assertEqual(traverse_obj(etree, './/neighbor/@name'), + ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'], + msg='`@` at end of path should give that attribute') + self.assertEqual(traverse_obj(etree, '//neighbor/@fail'), [None, None, None, None, None], + msg='`@` at end of path should give `None`') + self.assertEqual(traverse_obj(etree, ('//neighbor/@', 2)), {'name': 'Malaysia', 'direction': 'N'}, + msg='`@` should give the full attribute dict') + self.assertEqual(traverse_obj(etree, '//year/text()'), ['2008', '2011', '2011'], + msg='`text()` at end of path should give the inner text') + self.assertEqual(traverse_obj(etree, '//*[@direction]/@direction'), ['E', 'W', 'N', 'W', 'E'], + msg='full python xpath features should be supported') + self.assertEqual(traverse_obj(etree, (0, '@name')), 'Liechtenstein', + msg='special transformations should act on current element') + self.assertEqual(traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})), [1, 2008, 141100], + msg='special transformations should act on current element') + def test_http_header_dict(self): headers = HTTPHeaderDict() headers['ytdl-test'] = b'0' diff --git a/yt-dlp.cmd b/yt-dlp.cmd index aa4500f9f11c..5537e0ea9c3e 100644 --- a/yt-dlp.cmd +++ b/yt-dlp.cmd @@ -1 +1 @@ -@py -bb -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* +@py -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* diff --git a/yt-dlp.sh b/yt-dlp.sh index 22a69250c049..ce74df801fa8 100755 --- a/yt-dlp.sh +++ b/yt-dlp.sh @@ -1,2 +1,2 @@ #!/usr/bin/env sh -exec "${PYTHON:-python3}" -bb -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" +exec "${PYTHON:-python3}" -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index 5e28fd0e21c0..8d96498a6780 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -60,7 +60,13 @@ get_postprocessor, ) from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping -from .update import REPOSITORY, _get_system_deprecation, _make_label, current_git_head, detect_variant +from .update import ( + REPOSITORY, + _get_system_deprecation, + _make_label, + current_git_head, + detect_variant, +) from .utils import ( DEFAULT_OUTTMPL, IDENTITY, diff --git a/yt_dlp/cookies.py b/yt_dlp/cookies.py index a71fbc28baa4..eac033e391d4 100644 --- a/yt_dlp/cookies.py +++ b/yt_dlp/cookies.py @@ -186,7 +186,7 @@ def _firefox_browser_dir(): if sys.platform in ('cygwin', 'win32'): return os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') elif sys.platform == 'darwin': - return os.path.expanduser('~/Library/Application Support/Firefox') + return os.path.expanduser('~/Library/Application Support/Firefox/Profiles') return os.path.expanduser('~/.mozilla/firefox') diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 62103f13c143..557ff9447042 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -134,8 +134,8 @@ from .arkena import ArkenaIE from .ard import ( ARDBetaMediathekIE, + ARDMediathekCollectionIE, ARDIE, - ARDMediathekIE, ) from .arte import ( ArteTVIE, @@ -2019,7 +2019,6 @@ TuneInPodcastEpisodeIE, TuneInShortenerIE, ) -from .turbo import TurboIE from .tv2 import ( TV2IE, TV2ArticleIE, @@ -2223,6 +2222,7 @@ VikiIE, VikiChannelIE, ) +from .viously import ViouslyIE from .viqeo import ViqeoIE from .viu import ( ViuIE, diff --git a/yt_dlp/extractor/ard.py b/yt_dlp/extractor/ard.py index 8ac926c917fb..f4b1cd0756a4 100644 --- a/yt_dlp/extractor/ard.py +++ b/yt_dlp/extractor/ard.py @@ -1,24 +1,24 @@ -import json import re +from functools import partial from .common import InfoExtractor -from .generic import GenericIE from ..utils import ( + OnDemandPagedList, + bug_reports_message, determine_ext, - ExtractorError, int_or_none, + join_nonempty, + make_archive_id, parse_duration, - qualities, + parse_iso8601, + remove_start, str_or_none, - try_get, unified_strdate, - unified_timestamp, - update_url, update_url_query, url_or_none, xpath_text, ) -from ..compat import compat_etree_fromstring +from ..utils.traversal import traverse_obj class ARDMediathekBaseIE(InfoExtractor): @@ -61,45 +61,6 @@ def _parse_media_info(self, media_info, video_id, fsk): 'subtitles': subtitles, } - def _ARD_extract_episode_info(self, title): - """Try to extract season/episode data from the title.""" - res = {} - if not title: - return res - - for pattern in [ - # Pattern for title like "Homo sapiens (S06/E07) - Originalversion" - # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw - r'.*(?P \(S(?P\d+)/E(?P\d+)\)).*', - # E.g.: title="Fritjof aus Norwegen (2) (AD)" - # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/ - r'.*(?P \((?:Folge |Teil )?(?P\d+)(?:/\d+)?\)).*', - r'.*(?PFolge (?P\d+)(?:\:| -|) )\"(?P.+)\".*', - # E.g.: title="Folge 25/42: Symmetrie" - # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/ - # E.g.: title="Folge 1063 - Vertrauen" - # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/ - r'.*(?PFolge (?P\d+)(?:/\d+)?(?:\:| -|) ).*', - ]: - m = re.match(pattern, title) - if m: - groupdict = m.groupdict() - res['season_number'] = int_or_none(groupdict.get('season_number')) - res['episode_number'] = int_or_none(groupdict.get('episode_number')) - res['episode'] = str_or_none(groupdict.get('episode')) - # Build the episode title by removing numeric episode information: - if groupdict.get('ep_info') and not res['episode']: - res['episode'] = str_or_none( - title.replace(groupdict.get('ep_info'), '')) - if res['episode']: - res['episode'] = res['episode'].strip() - break - - # As a fallback use the whole title as the episode name: - if not res.get('episode'): - res['episode'] = title.strip() - return res - def _extract_formats(self, media_info, video_id): type_ = media_info.get('_type') media_array = media_info.get('_mediaArray', []) @@ -155,138 +116,6 @@ def _extract_formats(self, media_info, video_id): return formats -class ARDMediathekIE(ARDMediathekBaseIE): - IE_NAME = 'ARD:mediathek' - _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' - - _TESTS = [{ - # available till 26.07.2022 - 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822', - 'info_dict': { - 'id': '44726822', - 'ext': 'mp4', - 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?', - 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5', - 'duration': 1740, - }, - 'params': { - # m3u8 download - 'skip_download': True, - } - }, { - 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', - 'only_matching': True, - }, { - # audio - 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', - 'only_matching': True, - }, { - 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', - 'only_matching': True, - }, { - # audio - 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', - 'only_matching': True, - }, { - 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', - 'only_matching': True, - }] - - @classmethod - def suitable(cls, url): - return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) - - def _real_extract(self, url): - # determine video id from url - m = self._match_valid_url(url) - - document_id = None - - numid = re.search(r'documentId=([0-9]+)', url) - if numid: - document_id = video_id = numid.group(1) - else: - video_id = m.group('video_id') - - webpage = self._download_webpage(url, video_id) - - ERRORS = ( - ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), - ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', - 'Video %s is no longer available'), - ) - - for pattern, message in ERRORS: - if pattern in webpage: - raise ExtractorError(message % video_id, expected=True) - - if re.search(r'[\?&]rss($|[=&])', url): - doc = compat_etree_fromstring(webpage.encode('utf-8')) - if doc.tag == 'rss': - return GenericIE()._extract_rss(url, video_id, doc) - - title = self._og_search_title(webpage, default=None) or self._html_search_regex( - [r'(.*?)', - r'', - r'

(.*?)

', - r']*>(.*?)'], - webpage, 'title') - description = self._og_search_description(webpage, default=None) or self._html_search_meta( - 'dcterms.abstract', webpage, 'description', default=None) - if description is None: - description = self._html_search_meta( - 'description', webpage, 'meta description', default=None) - if description is None: - description = self._html_search_regex( - r'(.+?)

', - webpage, 'teaser text', default=None) - - # Thumbnail is sometimes not present. - # It is in the mobile version, but that seems to use a different URL - # structure altogether. - thumbnail = self._og_search_thumbnail(webpage, default=None) - - media_streams = re.findall(r'''(?x) - mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s* - "([^"]+)"''', webpage) - - if media_streams: - QUALITIES = qualities(['lo', 'hi', 'hq']) - formats = [] - for furl in set(media_streams): - if furl.endswith('.f4m'): - fid = 'f4m' - else: - fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl) - fid = fid_m.group(1) if fid_m else None - formats.append({ - 'quality': QUALITIES(fid), - 'format_id': fid, - 'url': furl, - }) - info = { - 'formats': formats, - } - else: # request JSON file - if not document_id: - video_id = self._search_regex( - (r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'), - webpage, 'media id', default=None) - info = self._extract_media_info( - 'http://www.ardmediathek.de/play/media/%s' % video_id, - webpage, video_id) - - info.update({ - 'id': video_id, - 'title': title, - 'description': description, - 'thumbnail': thumbnail, - }) - info.update(self._ARD_extract_episode_info(info['title'])) - - return info - - class ARDIE(InfoExtractor): _VALID_URL = r'(?Phttps?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P[^/?#&]+))\.html' _TESTS = [{ @@ -399,20 +228,22 @@ def _real_extract(self, url): } -class ARDBetaMediathekIE(ARDMediathekBaseIE): +class ARDBetaMediathekIE(InfoExtractor): + IE_NAME = 'ARDMediathek' _VALID_URL = r'''(?x)https:// (?:(?:beta|www)\.)?ardmediathek\.de/ - (?:(?P[^/]+)/)? - (?:player|live|video|(?Psendung|serie|sammlung))/ - (?:(?P(?(playlist)[^?#]+?|[^?#]+))/)? - (?P(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) - (?(playlist)/(?P\d+)?/?(?:[?#]|$))''' + (?:[^/]+/)? + (?:player|live|video)/ + (?:[^?#]+/)? + (?P[a-zA-Z0-9]+) + /?(?:[?#]|$)''' + _GEO_COUNTRIES = ['DE'] _TESTS = [{ 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', 'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', 'info_dict': { - 'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen', + 'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', 'id': '12939099', 'title': 'Liebe auf vier Pfoten', 'description': r're:^Claudia Schmitt, Anwältin in Salzburg', @@ -422,7 +253,10 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'upload_date': '20231130', 'ext': 'mp4', 'episode': 'Liebe auf vier Pfoten', - 'series': 'Filme im MDR' + 'series': 'Filme im MDR', + 'age_limit': 0, + 'channel': 'MDR', + '_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'], }, }, { 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', @@ -449,11 +283,31 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): 'timestamp': 1636398000, 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', 'upload_date': '20211108', - 'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste', + 'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', 'duration': 915, 'episode': 'tagesschau, 20:00 Uhr', 'series': 'tagesschau', 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', + 'channel': 'ARD-Aktuell', + '_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'], + }, + }, { + 'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', + 'md5': 'c428b9effff18ff624d4f903bda26315', + 'info_dict': { + 'id': '94834686', + 'ext': 'mp4', + 'duration': 2700, + 'episode': '7 Tage ... unter harten Jungs', + 'description': 'md5:0f215470dcd2b02f59f4bd10c963f072', + 'upload_date': '20231005', + 'timestamp': 1696491171, + 'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', + 'series': '7 Tage ...', + 'channel': 'HR', + 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a', + 'title': '7 Tage ... unter harten Jungs', + '_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'], }, }, { 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', @@ -470,6 +324,185 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): }, { 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', 'only_matching': True, + }, { + 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', + 'only_matching': True, + }] + + def _extract_episode_info(self, title): + patterns = [ + # Pattern for title like "Homo sapiens (S06/E07) - Originalversion" + # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw + r'.*(?P \(S(?P\d+)/E(?P\d+)\)).*', + # E.g.: title="Fritjof aus Norwegen (2) (AD)" + # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/ + r'.*(?P \((?:Folge |Teil )?(?P\d+)(?:/\d+)?\)).*', + r'.*(?PFolge (?P\d+)(?:\:| -|) )\"(?P.+)\".*', + # E.g.: title="Folge 25/42: Symmetrie" + # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/ + # E.g.: title="Folge 1063 - Vertrauen" + # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/ + r'.*(?PFolge (?P\d+)(?:/\d+)?(?:\:| -|) ).*', + # As a fallback use the full title + r'(?P.*)', + ] + + return traverse_obj(patterns, (..., {partial(re.match, string=title)}, { + 'season_number': ('season_number', {int_or_none}), + 'episode_number': ('episode_number', {int_or_none}), + 'episode': (( + ('episode', {str_or_none}), + ('ep_info', {lambda x: title.replace(x, '')}), + ('title', {str}), + ), {str.strip}), + }), get_all=False) + + def _real_extract(self, url): + display_id = self._match_id(url) + + page_data = self._download_json( + f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', display_id, query={ + 'embedded': 'false', + 'mcV6': 'true', + }) + + # For user convenience we use the old contentId instead of the longer crid + # Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283 + old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int})) + if old_id is not None: + video_id = str(old_id) + archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)] + else: + self.report_warning(f'Could not extract contentId{bug_reports_message()}') + video_id = display_id + archive_ids = None + + player_data = traverse_obj( + page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False) + is_live = player_data.get('type') == 'player_live' + media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict})) + + if player_data.get('blockedByFsk'): + self.raise_no_formats('This video is only available after 22:00', expected=True) + + formats = [] + subtitles = {} + for stream in traverse_obj(media_data, ('streams', ..., {dict})): + kind = stream.get('kind') + # Prioritize main stream over sign language and others + preference = 1 if kind == 'main' else None + for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))): + media_url = media['url'] + + audio_kind = traverse_obj(media, ( + 'audios', 0, 'kind', {str}), default='').replace('standard', '') + lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu' + lang = join_nonempty(lang_code, audio_kind) + language_preference = 10 if lang == 'deu' else -10 + + if determine_ext(media_url) == 'm3u8': + fmts, subs = self._extract_m3u8_formats_and_subtitles( + media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live) + for f in fmts: + f['language'] = lang + f['language_preference'] = language_preference + formats.extend(fmts) + self._merge_subtitles(subs, target=subtitles) + else: + formats.append({ + 'url': media_url, + 'format_id': f'http-{kind}', + 'preference': preference, + 'language': lang, + 'language_preference': language_preference, + **traverse_obj(media, { + 'format_note': ('forcedLabel', {str}), + 'width': ('maxHResolutionPx', {int_or_none}), + 'height': ('maxVResolutionPx', {int_or_none}), + 'vcodec': ('videoCodec', {str}), + }), + }) + + for sub in traverse_obj(media_data, ('subtitles', ..., {dict})): + for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))): + subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({ + 'url': sources['url'], + 'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')), + }) + + age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none})) + return { + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'subtitles': subtitles, + 'is_live': is_live, + 'age_limit': age_limit, + **traverse_obj(media_data, ('meta', { + 'title': 'title', + 'description': 'synopsis', + 'timestamp': ('broadcastedOnDateTime', {parse_iso8601}), + 'series': 'seriesTitle', + 'thumbnail': ('images', 0, 'url', {url_or_none}), + 'duration': ('durationSeconds', {int_or_none}), + 'channel': 'clipSourceName', + })), + **self._extract_episode_info(page_data.get('title')), + '_old_archive_ids': archive_ids, + } + + +class ARDMediathekCollectionIE(InfoExtractor): + _VALID_URL = r'''(?x)https:// + (?:(?:beta|www)\.)?ardmediathek\.de/ + (?:[^/?#]+/)? + (?P<playlist>sendung|serie|sammlung)/ + (?:(?P<display_id>[^?#]+?)/)? + (?P<id>[a-zA-Z0-9]+) + (?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)''' + _GEO_COUNTRIES = ['DE'] + + _TESTS = [{ + 'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV', + 'info_dict': { + 'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV', + 'display_id': 'quiz/staffel-1-originalversion', + 'title': 'Staffel 1 Originalversion', + }, + 'playlist_count': 3, + }, { + 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD', + 'info_dict': { + 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD', + 'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription', + 'title': 'Staffel 4 mit Audiodeskription', + }, + 'playlist_count': 12, + }, { + 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/', + 'info_dict': { + 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1', + 'display_id': 'babylon-berlin/staffel-1', + 'title': 'Staffel 1', + }, + 'playlist_count': 8, + }, { + 'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA', + 'info_dict': { + 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA', + 'display_id': 'tatort', + 'title': 'Tatort', + }, + 'playlist_mincount': 500, + }, { + 'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2', + 'info_dict': { + 'id': '5eOHzt8XB2sqeFXbIoJlg2', + 'display_id': 'die-kirche-bleibt-im-dorf', + 'title': 'Die Kirche bleibt im Dorf', + 'description': 'Die Kirche bleibt im Dorf', + }, + 'playlist_count': 4, }, { # playlist of type 'sendung' 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', @@ -482,196 +515,48 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE): # playlist of type 'sammlung' 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', 'only_matching': True, - }, { - 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', - 'only_matching': True, - }, { - 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet', - 'only_matching': True, }] - def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number): - """ Query the ARD server for playlist information - and returns the data in "raw" format """ - assert mode in ('sendung', 'serie', 'sammlung') - if mode in ('sendung', 'serie'): - graphQL = json.dumps({ - 'query': '''{ - showPage( - client: "%s" - showId: "%s" - pageNumber: %d - ) { - pagination { - pageSize - totalElements - } - teasers { # Array - mediumTitle - links { target { id href title } } - type - } - }}''' % (client, playlist_id, page_number), - }).encode() - else: # mode == 'sammlung' - graphQL = json.dumps({ - 'query': '''{ - morePage( - client: "%s" - compilationId: "%s" - pageNumber: %d - ) { - widget { - pagination { - pageSize - totalElements - } - teasers { # Array - mediumTitle - links { target { id href title } } - type - } - } - }}''' % (client, playlist_id, page_number), - }).encode() - # Ressources for ARD graphQL debugging: - # https://api-test.ardmediathek.de/public-gateway - show_page = self._download_json( - 'https://api.ardmediathek.de/public-gateway', - '[Playlist] %s' % display_id, - data=graphQL, - headers={'Content-Type': 'application/json'})['data'] - # align the structure of the returned data: - if mode in ('sendung', 'serie'): - show_page = show_page['showPage'] - else: # mode == 'sammlung' - show_page = show_page['morePage']['widget'] - return show_page - - def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): - """ Collects all playlist entries and returns them as info dict. - Supports playlists of mode 'sendung', 'serie', and 'sammlung', - as well as nested playlists. """ - entries = [] - pageNumber = 0 - while True: # iterate by pageNumber - show_page = self._ARD_load_playlist_snippet( - playlist_id, display_id, client, mode, pageNumber) - for teaser in show_page['teasers']: # process playlist items - if '/compilation/' in teaser['links']['target']['href']: - # alternativ cond.: teaser['type'] == "compilation" - # => This is an nested compilation, e.g. like: - # https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/ - link_mode = 'sammlung' - else: - link_mode = 'video' - - item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % ( - client, link_mode, display_id, - # perform HTLM quoting of episode title similar to ARD: - re.sub('^-|-$', '', # remove '-' from begin/end - re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by - - teaser['links']['target']['title'].lower() - .replace('ä', 'ae').replace('ö', 'oe') - .replace('ü', 'ue').replace('ß', 'ss'))), - teaser['links']['target']['id']) - entries.append(self.url_result( - item_url, - ie=ARDBetaMediathekIE.ie_key())) - - if (show_page['pagination']['pageSize'] * (pageNumber + 1) - >= show_page['pagination']['totalElements']): - # we've processed enough pages to get all playlist entries - break - pageNumber = pageNumber + 1 - - return self.playlist_result(entries, playlist_id, playlist_title=display_id) + _PAGE_SIZE = 100 def _real_extract(self, url): - video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group( - 'id', 'display_id', 'playlist', 'client', 'season') - display_id, client = display_id or video_id, client or 'ard' - - if playlist_type: - # TODO: Extract only specified season - return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) - - player_page = self._download_json( - 'https://api.ardmediathek.de/public-gateway', - display_id, data=json.dumps({ - 'query': '''{ - playerPage(client:"%s", clipId: "%s") { - blockedByFsk - broadcastedOn - maturityContentRating - mediaCollection { - _duration - _geoblocked - _isLive - _mediaArray { - _mediaStreamArray { - _quality - _server - _stream - } - } - _previewImage - _subtitleUrl - _type - } - show { - title - } - image { - src - } - synopsis - title - tracking { - atiCustomVars { - contentId - } - } - } -}''' % (client, video_id), - }).encode(), headers={ - 'Content-Type': 'application/json' - })['data']['playerPage'] - title = player_page['title'] - content_id = str_or_none(try_get( - player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) - media_collection = player_page.get('mediaCollection') or {} - if not media_collection and content_id: - media_collection = self._download_json( - 'https://www.ardmediathek.de/play/media/' + content_id, - content_id, fatal=False) or {} - info = self._parse_media_info( - media_collection, content_id or video_id, - player_page.get('blockedByFsk')) - age_limit = None - description = player_page.get('synopsis') - maturity_content_rating = player_page.get('maturityContentRating') - if maturity_content_rating: - age_limit = int_or_none(maturity_content_rating.lstrip('FSK')) - if not age_limit and description: - age_limit = int_or_none(self._search_regex( - r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None)) - info.update({ - 'age_limit': age_limit, - 'display_id': display_id, - 'title': title, - 'description': description, - 'timestamp': unified_timestamp(player_page.get('broadcastedOn')), - 'series': try_get(player_page, lambda x: x['show']['title']), - 'thumbnail': (media_collection.get('_previewImage') - or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None)) - or self.get_thumbnail_from_html(display_id, url)), - }) - info.update(self._ARD_extract_episode_info(info['title'])) - return info - - def get_thumbnail_from_html(self, display_id, url): - webpage = self._download_webpage(url, display_id, fatal=False) or '' - return ( - self._og_search_thumbnail(webpage, default=None) - or self._html_search_meta('thumbnailUrl', webpage, default=None)) + playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group( + 'id', 'display_id', 'playlist', 'season', 'version') + + def call_api(page_num): + api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset' + return self._download_json( + f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id, + f'Downloading playlist page {page_num}', query={ + 'pageNumber': page_num, + 'pageSize': self._PAGE_SIZE, + **({ + 'seasoned': 'true', + 'seasonNumber': season_number, + 'withOriginalversion': 'true' if version == 'OV' else 'false', + 'withAudiodescription': 'true' if version == 'AD' else 'false', + } if season_number else {}), + }) + + def fetch_page(page_num): + for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})): + item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False) + if not item_id or item_id == playlist_id: + continue + item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video' + yield self.url_result( + f'https://www.ardmediathek.de/{item_mode}/{item_id}', + ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE), + **traverse_obj(item, { + 'id': ('id', {str}), + 'title': ('longTitle', {str}), + 'duration': ('duration', {int_or_none}), + 'timestamp': ('broadcastedOn', {parse_iso8601}), + })) + + page_data = call_api(0) + full_id = join_nonempty(playlist_id, season_number, version, delim='_') + + return self.playlist_result( + OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id, + title=page_data.get('title'), description=page_data.get('synopsis')) diff --git a/yt_dlp/extractor/banbye.py b/yt_dlp/extractor/banbye.py index dfcc82f02165..67af29a962e4 100644 --- a/yt_dlp/extractor/banbye.py +++ b/yt_dlp/extractor/banbye.py @@ -152,7 +152,7 @@ def page_func(page_num): 'sort': 'new', 'limit': self._PAGE_SIZE, 'offset': page_num * self._PAGE_SIZE, - }, note=f'Downloading page {page_num+1}') + }, note=f'Downloading page {page_num + 1}') return [ self.url_result(f"{self._VIDEO_BASE}/{video['_id']}", BanByeIE) for video in data['items'] diff --git a/yt_dlp/extractor/bigo.py b/yt_dlp/extractor/bigo.py index 1cb6e58be68c..acf78e49a735 100644 --- a/yt_dlp/extractor/bigo.py +++ b/yt_dlp/extractor/bigo.py @@ -29,7 +29,8 @@ def _real_extract(self, url): info_raw = self._download_json( 'https://ta.bigo.tv/official_website/studio/getInternalStudioInfo', - user_id, data=urlencode_postdata({'siteId': user_id})) + user_id, data=urlencode_postdata({'siteId': user_id}), + headers={'Accept': 'application/json'}) if not isinstance(info_raw, dict): raise ExtractorError('Received invalid JSON data') diff --git a/yt_dlp/extractor/duoplay.py b/yt_dlp/extractor/duoplay.py index e57fa7924fd9..7d3f39942d1d 100644 --- a/yt_dlp/extractor/duoplay.py +++ b/yt_dlp/extractor/duoplay.py @@ -53,21 +53,6 @@ class DuoplayIE(InfoExtractor): 'episode_id': 14, 'release_year': 2010, }, - }, { - 'note': 'Movie', - 'url': 'https://duoplay.ee/4325/naljamangud', - 'md5': '2b0bcac4159a08b1844c2bfde06b1199', - 'info_dict': { - 'id': '4325', - 'ext': 'mp4', - 'title': 'Näljamängud', - 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$', - 'description': 'md5:fb35f5eb2ff46cdb82e4d5fbe7b49a13', - 'cast': ['Jennifer Lawrence', 'Josh Hutcherson', 'Liam Hemsworth'], - 'upload_date': '20231109', - 'timestamp': 1699552800, - 'release_year': 2012, - }, }, { 'note': 'Movie without expiry', 'url': 'https://duoplay.ee/5501/pilvede-all.-neljas-ode', diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py index 09abb40bf618..2cf4d4e648af 100644 --- a/yt_dlp/extractor/floatplane.py +++ b/yt_dlp/extractor/floatplane.py @@ -173,8 +173,8 @@ def format_path(params): 'formats': formats, }) - uploader_url = format_field(traverse_obj( - post_data, 'creator'), 'urlname', 'https://www.floatplane.com/channel/%s/home', default=None) + uploader_url = format_field( + post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None channel_url = urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))) post_info = { @@ -248,7 +248,7 @@ def _fetch_page(self, display_id, creator_id, channel_id, page): for post in page_data or []: yield self.url_result( f'https://www.floatplane.com/post/{post["id"]}', - ie=FloatplaneIE, video_id=post['id'], video_title=post.get('title'), + FloatplaneIE, id=post['id'], title=post.get('title'), release_timestamp=parse_iso8601(post.get('releaseDate'))) def _real_extract(self, url): @@ -264,5 +264,5 @@ def _real_extract(self, url): return self.playlist_result(OnDemandPagedList(functools.partial( self._fetch_page, display_id, creator_data['id'], channel_data.get('id')), self._PAGE_SIZE), - display_id, playlist_title=channel_data.get('title') or creator_data.get('title'), - playlist_description=channel_data.get('about') or creator_data.get('about')) + display_id, title=channel_data.get('title') or creator_data.get('title'), + description=channel_data.get('about') or creator_data.get('about')) diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index 606b4f5d1e3b..1f0011c09fb4 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -35,8 +35,8 @@ unified_timestamp, unsmuggle_url, update_url_query, - urlhandle_detect_ext, url_or_none, + urlhandle_detect_ext, urljoin, variadic, xpath_attr, diff --git a/yt_dlp/extractor/nhk.py b/yt_dlp/extractor/nhk.py index cc3c791741a3..4b3d185a325f 100644 --- a/yt_dlp/extractor/nhk.py +++ b/yt_dlp/extractor/nhk.py @@ -665,7 +665,7 @@ def _real_extract(self, url): noa_info = self._download_json( f'https:{config.find(".//url_program_noa").text}'.format(area=data.find('areakey').text), - station, note=f'Downloading {area} station metadata') + station, note=f'Downloading {area} station metadata', fatal=False) present_info = traverse_obj(noa_info, ('nowonair_list', self._NOA_STATION_IDS.get(station), 'present')) return { diff --git a/yt_dlp/extractor/panopto.py b/yt_dlp/extractor/panopto.py index 5ab2b2bcec40..ddea32d70732 100644 --- a/yt_dlp/extractor/panopto.py +++ b/yt_dlp/extractor/panopto.py @@ -536,7 +536,7 @@ def _fetch_page(self, base_url, query_params, display_id, page): } response = self._call_api( - base_url, '/Services/Data.svc/GetSessions', f'{display_id} page {page+1}', + base_url, '/Services/Data.svc/GetSessions', f'{display_id} page {page + 1}', data={'queryParameters': params}, fatal=False) for result in get_first(response, 'Results', default=[]): diff --git a/yt_dlp/extractor/radiofrance.py b/yt_dlp/extractor/radiofrance.py index ec1b97631e5f..6bd6fe9b68b9 100644 --- a/yt_dlp/extractor/radiofrance.py +++ b/yt_dlp/extractor/radiofrance.py @@ -264,7 +264,7 @@ def _real_extract(self, url): } -class RadioFrancePlaylistBase(RadioFranceBaseIE): +class RadioFrancePlaylistBaseIE(RadioFranceBaseIE): """Subclasses must set _METADATA_KEY""" def _call_api(self, content_id, cursor, page_num): @@ -308,7 +308,7 @@ def _real_extract(self, url): })}) -class RadioFrancePodcastIE(RadioFrancePlaylistBase): +class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE): _VALID_URL = rf'''(?x) {RadioFranceBaseIE._VALID_URL_BASE} /(?:{RadioFranceBaseIE._STATIONS_RE}) @@ -369,7 +369,7 @@ def _call_api(self, podcast_id, cursor, page_num): note=f'Downloading page {page_num}', query={'pageCursor': cursor}) -class RadioFranceProfileIE(RadioFrancePlaylistBase): +class RadioFranceProfileIE(RadioFrancePlaylistBaseIE): _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)' _TESTS = [{ diff --git a/yt_dlp/extractor/turbo.py b/yt_dlp/extractor/turbo.py deleted file mode 100644 index cdb7dcff85af..000000000000 --- a/yt_dlp/extractor/turbo.py +++ /dev/null @@ -1,64 +0,0 @@ -import re - -from .common import InfoExtractor -from ..compat import compat_str -from ..utils import ( - ExtractorError, - int_or_none, - qualities, - xpath_text, -) - - -class TurboIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?turbo\.fr/videos-voiture/(?P<id>[0-9]+)-' - _API_URL = 'http://www.turbo.fr/api/tv/xml.php?player_generique=player_generique&id={0:}' - _TEST = { - 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html', - 'md5': '33f4b91099b36b5d5a91f84b5bcba600', - 'info_dict': { - 'id': '454443', - 'ext': 'mp4', - 'duration': 3715, - 'title': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia... ', - 'description': 'Turbo du 07/09/2014 : Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...', - 'thumbnail': r're:^https?://.*\.jpg$', - } - } - - def _real_extract(self, url): - mobj = self._match_valid_url(url) - video_id = mobj.group('id') - - webpage = self._download_webpage(url, video_id) - - playlist = self._download_xml(self._API_URL.format(video_id), video_id) - item = playlist.find('./channel/item') - if item is None: - raise ExtractorError('Playlist item was not found', expected=True) - - title = xpath_text(item, './title', 'title') - duration = int_or_none(xpath_text(item, './durate', 'duration')) - thumbnail = xpath_text(item, './visuel_clip', 'thumbnail') - description = self._html_search_meta('description', webpage) - - formats = [] - get_quality = qualities(['3g', 'sd', 'hq']) - for child in item: - m = re.search(r'url_video_(?P<quality>.+)', child.tag) - if m: - quality = compat_str(m.group('quality')) - formats.append({ - 'format_id': quality, - 'url': child.text, - 'quality': get_quality(quality), - }) - - return { - 'id': video_id, - 'title': title, - 'duration': duration, - 'thumbnail': thumbnail, - 'description': description, - 'formats': formats, - } diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 3297ef091799..6dc0993afc7f 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -8,7 +8,6 @@ from ..compat import ( compat_parse_qs, compat_str, - compat_urllib_parse_urlencode, compat_urllib_parse_urlparse, ) from ..utils import ( @@ -191,6 +190,20 @@ def _get_thumbnails(self, thumbnail): 'url': thumbnail, }] if thumbnail else None + def _extract_twitch_m3u8_formats(self, video_id, token, signature): + """Subclasses must define _M3U8_PATH""" + return self._extract_m3u8_formats( + f'{self._USHER_BASE}/{self._M3U8_PATH}/{video_id}.m3u8', video_id, 'mp4', query={ + 'allow_source': 'true', + 'allow_audio_only': 'true', + 'allow_spectre': 'true', + 'p': random.randint(1000000, 10000000), + 'player': 'twitchweb', + 'playlist_include_framerate': 'true', + 'sig': signature, + 'token': token, + }) + class TwitchVodIE(TwitchBaseIE): IE_NAME = 'twitch:vod' @@ -203,6 +216,7 @@ class TwitchVodIE(TwitchBaseIE): ) (?P<id>\d+) ''' + _M3U8_PATH = 'vod' _TESTS = [{ 'url': 'http://www.twitch.tv/riotgames/v/6528877?t=5m10s', @@ -532,20 +546,8 @@ def _real_extract(self, url): info = self._extract_info_gql(video, vod_id) access_token = self._download_access_token(vod_id, 'video', 'id') - formats = self._extract_m3u8_formats( - '%s/vod/%s.m3u8?%s' % ( - self._USHER_BASE, vod_id, - compat_urllib_parse_urlencode({ - 'allow_source': 'true', - 'allow_audio_only': 'true', - 'allow_spectre': 'true', - 'player': 'twitchweb', - 'playlist_include_framerate': 'true', - 'nauth': access_token['value'], - 'nauthsig': access_token['signature'], - })), - vod_id, 'mp4', entry_protocol='m3u8_native') - + formats = self._extract_twitch_m3u8_formats( + vod_id, access_token['value'], access_token['signature']) formats.extend(self._extract_storyboard(vod_id, video.get('storyboard'), info.get('duration'))) self._prefer_source(formats) @@ -924,6 +926,7 @@ class TwitchStreamIE(TwitchBaseIE): ) (?P<id>[^/#?]+) ''' + _M3U8_PATH = 'api/channel/hls' _TESTS = [{ 'url': 'http://www.twitch.tv/shroomztv', @@ -1026,23 +1029,10 @@ def _real_extract(self, url): access_token = self._download_access_token( channel_name, 'stream', 'channelName') - token = access_token['value'] stream_id = stream.get('id') or channel_name - query = { - 'allow_source': 'true', - 'allow_audio_only': 'true', - 'allow_spectre': 'true', - 'p': random.randint(1000000, 10000000), - 'player': 'twitchweb', - 'playlist_include_framerate': 'true', - 'segment_preference': '4', - 'sig': access_token['signature'].encode('utf-8'), - 'token': token.encode('utf-8'), - } - formats = self._extract_m3u8_formats( - '%s/api/channel/hls/%s.m3u8' % (self._USHER_BASE, channel_name), - stream_id, 'mp4', query=query) + formats = self._extract_twitch_m3u8_formats( + channel_name, access_token['value'], access_token['signature']) self._prefer_source(formats) view_count = stream.get('viewers') diff --git a/yt_dlp/extractor/viously.py b/yt_dlp/extractor/viously.py new file mode 100644 index 000000000000..9ec7ed35f5da --- /dev/null +++ b/yt_dlp/extractor/viously.py @@ -0,0 +1,60 @@ +import base64 +import re + +from .common import InfoExtractor +from ..utils import ( + extract_attributes, + int_or_none, + parse_iso8601, +) +from ..utils.traversal import traverse_obj + + +class ViouslyIE(InfoExtractor): + _VALID_URL = False + _WEBPAGE_TESTS = [{ + 'url': 'http://www.turbo.fr/videos-voiture/454443-turbo-du-07-09-2014-renault-twingo-3-bentley-continental-gt-speed-ces-guide-achat-dacia.html', + 'md5': '37a6c3381599381ff53a7e1e0575c0bc', + 'info_dict': { + 'id': 'F_xQzS2jwb3', + 'ext': 'mp4', + 'title': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...', + 'description': 'Turbo du 07/09/2014\xa0: Renault Twingo 3, Bentley Continental GT Speed, CES, Guide Achat Dacia...', + 'age_limit': 0, + 'upload_date': '20230328', + 'timestamp': 1680037507, + 'duration': 3716, + 'categories': ['motors'], + } + }] + + def _extract_from_webpage(self, url, webpage): + viously_players = re.findall(r'<div[^>]*class="(?:[^"]*\s)?v(?:iou)?sly-player(?:\s[^"]*)?"[^>]*>', webpage) + if not viously_players: + return + + def custom_decode(text): + STANDARD_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=' + CUSTOM_ALPHABET = 'VIOUSLYABCDEFGHJKMNPQRTWXZviouslyabcdefghjkmnpqrtwxz9876543210+/=' + data = base64.b64decode(text.translate(str.maketrans(CUSTOM_ALPHABET, STANDARD_ALPHABET))) + return data.decode('utf-8').strip('\x00') + + for video_id in traverse_obj(viously_players, (..., {extract_attributes}, 'id')): + formats = self._extract_m3u8_formats( + f'https://www.viously.com/video/hls/{video_id}/index.m3u8', video_id, fatal=False) + if not formats: + continue + data = self._download_json( + f'https://www.viously.com/export/json/{video_id}', video_id, + transform_source=custom_decode, fatal=False) + yield { + 'id': video_id, + 'formats': formats, + **traverse_obj(data, ('video', { + 'title': ('title', {str}), + 'description': ('description', {str}), + 'duration': ('duration', {int_or_none}), + 'timestamp': ('iso_date', {parse_iso8601}), + 'categories': ('category', 'name', {str}, {lambda x: [x] if x else None}), + })), + } diff --git a/yt_dlp/extractor/wordpress.py b/yt_dlp/extractor/wordpress.py index 53820b57a907..378d99dbcca9 100644 --- a/yt_dlp/extractor/wordpress.py +++ b/yt_dlp/extractor/wordpress.py @@ -70,7 +70,7 @@ def _extract_from_webpage(self, url, webpage): 'height': int_or_none(traverse_obj(track, ('dimensions', 'original', 'height'))), 'width': int_or_none(traverse_obj(track, ('dimensions', 'original', 'width'))), } for track in traverse_obj(playlist_json, ('tracks', ...), expected_type=dict)] - yield self.playlist_result(entries, self._generic_id(url) + f'-wp-playlist-{i+1}', 'Wordpress Playlist') + yield self.playlist_result(entries, self._generic_id(url) + f'-wp-playlist-{i + 1}', 'Wordpress Playlist') class WordpressMiniAudioPlayerEmbedIE(InfoExtractor): diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 5b14b187a73e..88126d11f090 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -5297,6 +5297,7 @@ def _extract_webpage(self, url, item_id, fatal=True): # See: https://github.com/yt-dlp/yt-dlp/issues/116 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'): retry.error = ExtractorError('Incomplete yt initial data received') + data = None continue return webpage, data diff --git a/yt_dlp/networking/__init__.py b/yt_dlp/networking/__init__.py index 96c5a0678f85..acadc0147d59 100644 --- a/yt_dlp/networking/__init__.py +++ b/yt_dlp/networking/__init__.py @@ -28,4 +28,3 @@ pass except Exception as e: warnings.warn(f'Failed to import "websockets" request handler: {e}' + bug_reports_message()) - diff --git a/yt_dlp/networking/_helper.py b/yt_dlp/networking/_helper.py index a6fa3550bd1e..d79dd795305a 100644 --- a/yt_dlp/networking/_helper.py +++ b/yt_dlp/networking/_helper.py @@ -219,7 +219,7 @@ def _socket_connect(ip_addr, timeout, source_address): sock.bind(source_address) sock.connect(sa) return sock - except socket.error: + except OSError: sock.close() raise @@ -237,7 +237,7 @@ def create_socks_proxy_socket(dest_addr, proxy_args, proxy_ip_addr, timeout, sou sock.bind(source_address) sock.connect(dest_addr) return sock - except socket.error: + except OSError: sock.close() raise @@ -255,7 +255,7 @@ def create_connection( host, port = address ip_addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM) if not ip_addrs: - raise socket.error('getaddrinfo returns an empty list') + raise OSError('getaddrinfo returns an empty list') if source_address is not None: af = socket.AF_INET if ':' not in source_address[0] else socket.AF_INET6 ip_addrs = [addr for addr in ip_addrs if addr[0] == af] @@ -272,7 +272,7 @@ def create_connection( # https://bugs.python.org/issue36820 err = None return sock - except socket.error as e: + except OSError as e: err = e try: diff --git a/yt_dlp/networking/_requests.py b/yt_dlp/networking/_requests.py index 9fb1d75f4a39..e129110ca471 100644 --- a/yt_dlp/networking/_requests.py +++ b/yt_dlp/networking/_requests.py @@ -188,6 +188,7 @@ class RequestsSession(requests.sessions.Session): """ Ensure unified redirect method handling with our urllib redirect handler. """ + def rebuild_method(self, prepared_request, response): new_method = get_redirect_method(prepared_request.method, response.status_code) @@ -218,6 +219,7 @@ def filter(self, record): class Urllib3LoggingHandler(logging.Handler): """Redirect urllib3 logs to our logger""" + def __init__(self, logger, *args, **kwargs): super().__init__(*args, **kwargs) self._logger = logger @@ -367,7 +369,7 @@ def _new_conn(self): self, f'Connection to {self.host} timed out. (connect timeout={self.timeout})') from e except SocksProxyError as e: raise urllib3.exceptions.ProxyError(str(e), e) from e - except (OSError, socket.error) as e: + except OSError as e: raise urllib3.exceptions.NewConnectionError( self, f'Failed to establish a new connection: {e}') from e diff --git a/yt_dlp/networking/_websockets.py b/yt_dlp/networking/_websockets.py index ad85554e459a..ed64080d62a2 100644 --- a/yt_dlp/networking/_websockets.py +++ b/yt_dlp/networking/_websockets.py @@ -5,20 +5,26 @@ import ssl import sys -from ._helper import create_connection, select_proxy, make_socks_proxy_opts, create_socks_proxy_socket -from .common import Response, register_rh, Features +from ._helper import ( + create_connection, + create_socks_proxy_socket, + make_socks_proxy_opts, + select_proxy, +) +from .common import Features, Response, register_rh from .exceptions import ( CertificateVerifyError, HTTPError, + ProxyError, RequestError, SSLError, - TransportError, ProxyError, + TransportError, ) from .websocket import WebSocketRequestHandler, WebSocketResponse from ..compat import functools from ..dependencies import websockets -from ..utils import int_or_none from ..socks import ProxyError as SocksProxyError +from ..utils import int_or_none if not websockets: raise ImportError('websockets is not installed') diff --git a/yt_dlp/networking/websocket.py b/yt_dlp/networking/websocket.py index 09fcf78ac2d6..0e7e73c9e22a 100644 --- a/yt_dlp/networking/websocket.py +++ b/yt_dlp/networking/websocket.py @@ -2,7 +2,7 @@ import abc -from .common import Response, RequestHandler +from .common import RequestHandler, Response class WebSocketResponse(Response): diff --git a/yt_dlp/socks.py b/yt_dlp/socks.py index e7f41d7e2a45..b4957ac2ed28 100644 --- a/yt_dlp/socks.py +++ b/yt_dlp/socks.py @@ -49,7 +49,7 @@ class Socks5AddressType: ATYP_IPV6 = 0x04 -class ProxyError(socket.error): +class ProxyError(OSError): ERR_SUCCESS = 0x00 def __init__(self, code=None, msg=None): diff --git a/yt_dlp/utils/_utils.py b/yt_dlp/utils/_utils.py index 361617c0287a..89a0d4cff1bc 100644 --- a/yt_dlp/utils/_utils.py +++ b/yt_dlp/utils/_utils.py @@ -558,7 +558,7 @@ def decode(self, s): s = self._close_object(e) if s is not None: continue - raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos) + raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos) assert False, 'Too many attempts to decode JSON' @@ -1885,6 +1885,7 @@ def setproctitle(title): buf = ctypes.create_string_buffer(len(title_bytes)) buf.value = title_bytes try: + # PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h libc.prctl(15, buf, 0, 0, 0) except AttributeError: return # Strange libc, just skip this @@ -2260,6 +2261,9 @@ def __getitem__(self, idx): raise self.IndexError() return entries[0] + def __bool__(self): + return bool(self.getslice(0, 1)) + class OnDemandPagedList(PagedList): """Download pages until a page with less than maximum results""" @@ -5070,7 +5074,7 @@ def truncate_string(s, left, right=0): assert left > 3 and right >= 0 if s is None or len(s) <= left + right: return s - return f'{s[:left-3]}...{s[-right:] if right else ""}' + return f'{s[:left - 3]}...{s[-right:] if right else ""}' def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None): diff --git a/yt_dlp/utils/traversal.py b/yt_dlp/utils/traversal.py index ff5703198aea..8938f4c78298 100644 --- a/yt_dlp/utils/traversal.py +++ b/yt_dlp/utils/traversal.py @@ -3,6 +3,7 @@ import inspect import itertools import re +import xml.etree.ElementTree from ._utils import ( IDENTITY, @@ -23,7 +24,7 @@ def traverse_obj( >>> obj = [{}, {"key": "value"}] >>> traverse_obj(obj, (1, "key")) - "value" + 'value' Each of the provided `paths` is tested and the first producing a valid result will be returned. The next path will also be tested if the path branched but no results could be found. @@ -118,7 +119,7 @@ def apply_key(key, obj, is_last): branching = True if isinstance(obj, collections.abc.Mapping): result = obj.values() - elif is_iterable_like(obj): + elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element): result = obj elif isinstance(obj, re.Match): result = obj.groups() @@ -132,7 +133,7 @@ def apply_key(key, obj, is_last): branching = True if isinstance(obj, collections.abc.Mapping): iter_obj = obj.items() - elif is_iterable_like(obj): + elif is_iterable_like(obj) or isinstance(obj, xml.etree.ElementTree.Element): iter_obj = enumerate(obj) elif isinstance(obj, re.Match): iter_obj = itertools.chain( @@ -168,7 +169,7 @@ def apply_key(key, obj, is_last): result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None) elif isinstance(key, (int, slice)): - if is_iterable_like(obj, collections.abc.Sequence): + if is_iterable_like(obj, (collections.abc.Sequence, xml.etree.ElementTree.Element)): branching = isinstance(key, slice) with contextlib.suppress(IndexError): result = obj[key] @@ -176,6 +177,34 @@ def apply_key(key, obj, is_last): with contextlib.suppress(IndexError): result = str(obj)[key] + elif isinstance(obj, xml.etree.ElementTree.Element) and isinstance(key, str): + xpath, _, special = key.rpartition('/') + if not special.startswith('@') and special != 'text()': + xpath = key + special = None + + # Allow abbreviations of relative paths, absolute paths error + if xpath.startswith('/'): + xpath = f'.{xpath}' + elif xpath and not xpath.startswith('./'): + xpath = f'./{xpath}' + + def apply_specials(element): + if special is None: + return element + if special == '@': + return element.attrib + if special.startswith('@'): + return try_call(element.attrib.get, args=(special[1:],)) + if special == 'text()': + return element.text + assert False, f'apply_specials is missing case for {special!r}' + + if xpath: + result = list(map(apply_specials, obj.iterfind(xpath))) + else: + result = apply_specials(obj) + return branching, result if branching else (result,) def lazy_last(iterable): diff --git a/yt_dlp/version.py b/yt_dlp/version.py index fd923fe45e77..687ef8788f78 100644 --- a/yt_dlp/version.py +++ b/yt_dlp/version.py @@ -1,8 +1,8 @@ # Autogenerated by devscripts/update-version.py -__version__ = '2023.11.16' +__version__ = '2023.12.30' -RELEASE_GIT_HEAD = '24f827875c6ba513f12ed09a3aef2bbed223760d' +RELEASE_GIT_HEAD = 'f10589e3453009bb523f55849bba144c9b91cf2a' VARIANT = None @@ -12,4 +12,4 @@ ORIGIN = 'yt-dlp/yt-dlp' -_pkg_version = '2023.11.16' +_pkg_version = '2023.12.30'