diff --git a/README.md b/README.md index 2e1576d..ba5e6a6 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,9 @@ docker run --name bgutil-provider -d -p 4416:4416 brainicism/bgutil-ytdlp-pot-pr **Native:** ```shell -cd server/ +# replace 0.4.0 with the latest version, or a matching plugin +git clone --single-branch --branch 0.4.0 https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git +cd bgutils-ytdlp-pot-provider/server/ yarn install --frozen-lockfile npx tsc node build/main.js @@ -60,20 +62,24 @@ node build/main.js **Endpoints** - **POST /get_pot**: Accepts a `visitor_data` (unauthenticated), `data_sync_id` (authenticated) or an empty body in the request body. If no identifier is passed, a new unauthenticated `visitor_data` will be generated. Returns `po_token` and the associated identifier `visit_identifier`. -- **POST /invalidate_caches**: Resets the PO token cache, forcing new tokens to be generated on next fetch +- **POST /invalidate_caches**: Resets the PO token cache, forcing new tokens to be generated on next fetch. #### (b) Generation Script Option -The generation script needs to be transpiled to Javascript before it can be used by the plugin. +1. Transpile the generation script to Javascript: ```shell -cd server/ +# Clone/extract the contents into your home directory (`~/` on Unix-based systems, `%USERPROFILE%` for Windows) if you want to use this method without needing to specify `getpot_bgutil_script` extractor argument on each yt-dlp invocation. +cd ~ +# replace 0.4.0 with the latest version, or a matching plugin +git clone --single-branch --branch 0.4.0 https://github.com/Brainicism/bgutil-ytdlp-pot-provider.git +cd bgutils-ytdlp-pot-provider/server/ yarn install --frozen-lockfile npx tsc ``` -Make sure `node` is available in your `PATH`. +2. Make sure `node` is available in your `PATH`. ### 2. Install the plugin @@ -92,7 +98,7 @@ This will automatically install [coletdjnz's POT plugin framework](https://githu ## Usage -If using option (a) HTTP Server for the provider, use yt-dlp like normal 🙂. +If using option (a) HTTP Server for the provider, and the default IP/port number, you can use yt-dlp like normal 🙂. If you want to change the port number used by the provider server, use the `--port` option. @@ -108,8 +114,14 @@ If changing the port or IP used for the provider server, pass it to yt-dlp via ` --- -If using option (b) script for the provider, you need to pass the extractor argument `getpot_bgutil_script` to `youtube` for each yt-dlp call. The argument should include the path to the transpiled generation script (`server/build/generate_once.js`). +If using option (b) script for the provider, with the default script location in your home directory (i.e: `~/bgutil-ytdlp-pot-provider`), you can also use yt-dlp like normal. + +If you installed the script in a different location, pass it as the extractor argument `getpot_bgutil_script` to `youtube` for each yt-dlp call. ```shell --extractor-args "youtube:getpot_bgutil_script=$WORKSPACE/bgutil-ytdlp-pot-provider/server/build/generate_once.js" ``` + +--- + +If both methods are available for use, the option (b) script will be prioritized. diff --git a/plugin/pyproject.toml b/plugin/pyproject.toml index fcf1834..ead9bc9 100644 --- a/plugin/pyproject.toml +++ b/plugin/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "hatchling.build" [project] name = "bgutil-ytdlp-pot-provider" -version = "0.4.0" +dynamic = ["version"] readme = {file = "README.md", content-type = "text/markdown"} classifiers = [ "License :: OSI Approved :: GNU General Public License v3 (GPLv3)" @@ -27,3 +27,6 @@ packages = ["yt_dlp_plugins"] [tool.hatch.metadata] allow-direct-references = true + +[tool.hatch.version] +path = "yt_dlp_plugins/extractor/getpot_bgutil.py" diff --git a/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py b/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py index 2ede751..abeeedb 100644 --- a/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py +++ b/plugin/yt_dlp_plugins/extractor/getpot_bgutil.py @@ -1,105 +1 @@ -import json -import subprocess -import os.path -import shutil -from yt_dlp import YoutubeDL - -from yt_dlp.networking.common import Request -from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest -from yt_dlp.utils import Popen -from yt_dlp_plugins.extractor.getpot import GetPOTProvider, register_provider - - -@register_provider -class BgUtilPotProviderRH(GetPOTProvider): - _PROVIDER_NAME = 'BgUtilPot' - _SUPPORTED_CLIENTS = ('web_creator', 'web', 'web_embedded', 'web_music') - - def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs): - if not data_sync_id and not visitor_data: - raise UnsupportedRequest( - 'One of [data_sync_id, visitor_data] must be passed') - - def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str: - generate_pot_script_path = ydl.get_info_extractor('Youtube')._configuration_arg( - 'getpot_bgutil_script', [None], casesense=True)[0] - http_base_url = ydl.get_info_extractor('Youtube')._configuration_arg( - 'getpot_bgutil_baseurl', [None], casesense=True)[0] - if generate_pot_script_path: - self._logger.info( - f'Generating POT via script: {generate_pot_script_path}') - po_token = self._get_pot_via_script( - generate_pot_script_path, visitor_data, data_sync_id) - else: - self._logger.info('Generating POT via HTTP server') - po_token = self._get_pot_via_http( - ydl, client, visitor_data, data_sync_id, http_base_url) - - return po_token - - def _get_pot_via_http(self, ydl, client, visitor_data, data_sync_id, base_url): - if base_url is None: - base_url = 'http://127.0.0.1:4416' - try: - response = ydl.urlopen(Request(f'{base_url}/get_pot', data=json.dumps({ - 'client': client, - 'visitor_data': visitor_data, - 'data_sync_id': data_sync_id, - }).encode(), headers={'Content-Type': 'application/json'})) - except Exception as e: - raise RequestError(f'Error reaching POST /get_pot: {str(e)}') - - try: - response_json = json.load(response) - except Exception as e: - raise RequestError( - f'Error parsing response JSON(caused by {str(e)}). response = {response.read().decode()}') - - if error_msg := response_json.get('error'): - raise RequestError(error_msg) - if 'po_token' not in response_json: - raise RequestError('Server did not respond with a po_token') - - return response_json['po_token'] - - def _get_pot_via_script(self, script_path, visitor_data, data_sync_id): - if not os.path.isfile(script_path): - raise RequestError(f"Script path doesn't exist: {script_path}") - if os.path.basename(script_path) != 'generate_once.js': - raise RequestError( - 'Incorrect script passed to extractor args. Path to generate_once.js required') - if shutil.which('node') is None: - raise RequestError('node is not in PATH') - - command_args = ['node', script_path] - if data_sync_id: - command_args.extend(['-d', data_sync_id]) - elif visitor_data: - command_args.extend(['-v', visitor_data]) - else: - raise RequestError( - 'Unexpected missing visitorData and dataSyncId in _get_pot_via_script') - self._logger.debug( - f'Executing command to get POT via script: {" ".join(command_args)}') - - try: - stdout, stderr, returncode = Popen.run( - command_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - except Exception as e: - raise RequestError( - f'_get_pot_via_script failed: Unable to run script(caused by {str(e)})') - - self._logger.debug(f'stdout = {stdout}') - if returncode: - raise RequestError( - f'_get_pot_via_script failed with returncode {returncode}:\n{stderr.strip()}') - - # The JSON response is always the last line - script_data_resp = stdout.splitlines()[-1] - self._logger.debug( - f'_get_pot_via_script response = {script_data_resp}') - try: - return json.loads(script_data_resp)['poToken'] - except (json.JSONDecodeError, TypeError, KeyError) as e: - raise RequestError( - f'Error parsing JSON response from _get_pot_via_script(caused by {str(e)})') +__version__ = '0.4.0' diff --git a/plugin/yt_dlp_plugins/extractor/getpot_bgutil_http.py b/plugin/yt_dlp_plugins/extractor/getpot_bgutil_http.py new file mode 100644 index 0000000..c1a82ac --- /dev/null +++ b/plugin/yt_dlp_plugins/extractor/getpot_bgutil_http.py @@ -0,0 +1,54 @@ +import json +from yt_dlp import YoutubeDL + +from yt_dlp.networking.common import Request +from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest +from yt_dlp_plugins.extractor.getpot import GetPOTProvider, register_provider, register_preference +from yt_dlp_plugins.extractor.getpot_bgutil import __version__ + + +@register_provider +class BgUtilHTTPPotProviderRH(GetPOTProvider): + _PROVIDER_NAME = 'BgUtilHTTPPot' + _SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator') + VERSION = __version__ + + def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs): + base_url = ydl.get_info_extractor('Youtube')._configuration_arg( + 'getpot_bgutil_baseurl', ['http://127.0.0.1:4416'], casesense=True)[0] + if not data_sync_id and not visitor_data: + raise UnsupportedRequest( + 'One of [data_sync_id, visitor_data] must be passed') + # TODO: Ping the server + self.base_url = base_url + + def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str: + self._logger.info('Generating POT via HTTP server') + + try: + response = ydl.urlopen(Request(f'{self.base_url}/get_pot', data=json.dumps({ + 'client': client, + 'visitor_data': visitor_data, + 'data_sync_id': data_sync_id, + }).encode(), headers={'Content-Type': 'application/json'})) + except Exception as e: + raise RequestError( + f'Error reaching POST /get_pot (caused by {str(e)})') from e + + try: + response_json = json.load(response) + except Exception as e: + raise RequestError( + f'Error parsing response JSON (caused by {str(e)}). response = {response.read().decode()}') from e + + if error_msg := response_json.get('error'): + raise RequestError(error_msg) + if 'po_token' not in response_json: + raise RequestError('Server did not respond with a po_token') + + return response_json['po_token'] + + +@register_preference(BgUtilHTTPPotProviderRH) +def bgutil_HTTP_getpot_preference(rh, request): + return 0 diff --git a/plugin/yt_dlp_plugins/extractor/getpot_bgutil_script.py b/plugin/yt_dlp_plugins/extractor/getpot_bgutil_script.py new file mode 100644 index 0000000..383b23d --- /dev/null +++ b/plugin/yt_dlp_plugins/extractor/getpot_bgutil_script.py @@ -0,0 +1,81 @@ +import json +import subprocess +import os.path +import shutil +from yt_dlp import YoutubeDL + +from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest +from yt_dlp.utils import Popen, classproperty +from yt_dlp_plugins.extractor.getpot import GetPOTProvider, register_provider, register_preference +from yt_dlp_plugins.extractor.getpot_bgutil import __version__ + + +@register_provider +class BgUtilScriptPotProviderRH(GetPOTProvider): + _PROVIDER_NAME = 'BgUtilScriptPot' + _SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator') + VERSION = __version__ + + @classproperty(cache=True) + def _default_script_path(self): + home = os.path.expanduser('~') + return os.path.join( + home, 'bgutil-ytdlp-pot-provider', 'server', 'build', 'generate_once.js') + + def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs): + script_path = ydl.get_info_extractor('Youtube')._configuration_arg( + 'getpot_bgutil_script', [self._default_script_path], casesense=True)[0] + if not data_sync_id and not visitor_data: + raise UnsupportedRequest( + 'One of [data_sync_id, visitor_data] must be passed') + if not os.path.isfile(script_path): + raise UnsupportedRequest( + f"Script path doesn't exist: {script_path}") + if os.path.basename(script_path) != 'generate_once.js': + raise UnsupportedRequest( + 'Incorrect script passed to extractor args. Path to generate_once.js required') + if shutil.which('node') is None: + raise UnsupportedRequest('node is not in PATH') + self.script_path = script_path + + def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str: + self._logger.info( + f'Generating POT via script: {self.script_path}') + + command_args = ['node', self.script_path] + if data_sync_id: + command_args.extend(['-d', data_sync_id]) + elif visitor_data: + command_args.extend(['-v', visitor_data]) + else: + raise RequestError( + 'Unexpected missing visitorData and dataSyncId in _get_pot_via_script') + self._logger.debug( + f'Executing command to get POT via script: {" ".join(command_args)}') + + try: + stdout, stderr, returncode = Popen.run( + command_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + except Exception as e: + raise RequestError( + f'_get_pot_via_script failed: Unable to run script (caused by {str(e)})') from e + + self._logger.debug(f'stdout = {stdout}') + if returncode: + raise RequestError( + f'_get_pot_via_script failed with returncode {returncode}:\n{stderr.strip()}') + + # The JSON response is always the last line + script_data_resp = stdout.splitlines()[-1] + self._logger.debug( + f'_get_pot_via_script response = {script_data_resp}') + try: + return json.loads(script_data_resp)['poToken'] + except (json.JSONDecodeError, TypeError, KeyError) as e: + raise RequestError( + f'Error parsing JSON response from _get_pot_via_script (caused by {str(e)})') from e + + +@register_preference(BgUtilScriptPotProviderRH) +def bgutil_script_getpot_preference(rh, request): + return 100