Skip to content

Commit

Permalink
[plugin/misc] cleanup
Browse files Browse the repository at this point in the history
* allow custom http server base url by passing argument `getpot_bgutil_baseurl` (defaults to `http://127.0.0.1:4416`)
* Code formatting(use single quotes): be consistent, remove trailing whitespaces
* Use `yt_dlp.utils.Popen` instead of `subprocess.Popen`
* Add cause to several `RequestError`'s
* Do not use bare `except`
[server] add options to main.ts, misc
* generate_once.ts: Remove repeated logging
* main.ts: allow custom http server port by passing `-p, --port <PORT>` (defaults to 4416)
* main.ts: allow verbose logging for `SessionManager` by passing argument `--verbose`
* main.ts: return a JSON object when it fails to generate a visitordata
  • Loading branch information
grqz committed Sep 9, 2024
1 parent 78a39f0 commit 8eb6db2
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 35 deletions.
65 changes: 36 additions & 29 deletions plugin/yt_dlp_plugins/extractor/getpot_bgutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@

from yt_dlp.networking.common import Request
from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest
from yt_dlp_plugins.extractor.getpot import GetPOTProvider, register_provider, register_preference
from yt_dlp.utils import Popen
from yt_dlp_plugins.extractor.getpot import GetPOTProvider, register_provider


@register_provider
Expand All @@ -20,65 +21,71 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data

def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
generate_pot_script_path = ydl.get_info_extractor('Youtube')._configuration_arg('getpot_bgutil_script', [None], casesense=True)[0]
http_base_url = ydl.get_info_extractor('Youtube')._configuration_arg('getpot_bgutil_baseurl', ['http://127.0.0.1:4416'], casesense=True)[0]
if generate_pot_script_path:
self._logger.info(f"Generating POT via script: {generate_pot_script_path}")
self._logger.info(f'Generating POT via script: {generate_pot_script_path}')
po_token = self._get_pot_via_script(generate_pot_script_path, visitor_data, data_sync_id)
return po_token
else:
self._logger.info(f"Generating POT via HTTP server")
po_token = self._get_pot_via_http(ydl, client, visitor_data, data_sync_id)
self._logger.info('Generating POT via HTTP server')
po_token = self._get_pot_via_http(ydl, client, visitor_data, data_sync_id, http_base_url)

return po_token

def _get_pot_via_http(self, ydl, client, visitor_data, data_sync_id):
def _get_pot_via_http(self, ydl, client, visitor_data, data_sync_id, base_url):
try:
response = ydl.urlopen(Request('http://127.0.0.1:4416/get_pot', data=json.dumps({
'client': client,
'visitor_data': visitor_data,
'data_sync_id': data_sync_id
}).encode(), headers = {'Content-Type': 'application/json'}))
}).encode(), headers={'Content-Type': 'application/json'}))
except Exception as e:
raise RequestError(f"Error reaching POST /get_pot: {str(e)}")
raise RequestError(f'Error reaching POST /get_pot: {str(e)}')

try:
response_json = json.loads(response.read().decode('utf-8'))
except Exception as e:
raise RequestError(f"Error parsing response JSON: {str(e)}. response = {response.read().decode('utf-8')}")

raise RequestError(f'Error parsing response JSON. response = {response.read().decode("utf-8")}', cause=e)

if error_msg := response_json.get('error'):
raise RequestError(error_msg)
if 'po_token' not in response_json:
raise RequestError('Server did not respond with a po_token')

return response_json["po_token"]
return response_json['po_token']

def _get_pot_via_script(self, script_path, visitor_data, data_sync_id):
if not os.path.isfile(script_path):
raise RequestError(f"Script path doesn't exist: {script_path}")
if os.path.basename(script_path) != 'generate_once.js':
raise RequestError(f"Incorrect script passed to extractor args. Path to generate_once.js required")
raise RequestError('Incorrect script passed to extractor args. Path to generate_once.js required')
if shutil.which('node') is None:
raise RequestError(f"node is not in PATH")
raise RequestError('node is not in PATH')

# possibly vulnerable to shell injection here? but risk is low
command_args = ['node', script_path]
if data_sync_id:
command_args.extend(["-d", data_sync_id])
command_args.extend(['-d', data_sync_id])
elif visitor_data:
command_args.extend(["-v", visitor_data])
command_args.extend(['-v', visitor_data])
else:
raise RequestError("Unexpected missing visitorData/dataSyncId in _get_pot_via_script")
self._logger.debug(f"Executing command to get POT via script: {' '.join(command_args)}")
raise RequestError('Unexpected missing visitorData/dataSyncId in _get_pot_via_script')
self._logger.debug(f'Executing command to get POT via script: {" ".join(command_args)}')

try:
stdout, stderr, returncode = Popen.run(
command_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
except Exception as e:
raise RequestError('_get_pot_via_script failed: Unable to run script', cause=e)

self._logger.debug(f'stdout = {stdout}')
if returncode:
raise RequestError(
f'_get_pot_via_script failed with returncode {returncode}:\n{stderr.strip()}')

result = subprocess.run(command_args,stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

self._logger.debug(f"stdout = {result.stdout}")
if result.stderr or result.returncode != 0:
raise RequestError(f"_get_pot_via_script failed with return code {result.returncode}. stderr = {result.stderr}")

# the JSON response is always the last line
script_data_resp = result.stdout.splitlines()[-1]
self._logger.debug(f"_get_pot_via_script response = {script_data_resp}")
script_data_resp = stdout.splitlines()[-1]
self._logger.debug(f'_get_pot_via_script response = {script_data_resp}')
try:
return json.loads(script_data_resp)['poToken']
except:
raise RequestError("Error parsing JSON response from _get_pot_via_script")

except (json.JSONDecodeError, TypeError, KeyError) as e:
raise RequestError('Error parsing JSON response from _get_pot_via_script', cause=e)
3 changes: 0 additions & 3 deletions server/src/generate_once.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ const options = program.opts();
}

if (verbose) {
console.log(
`Received request for visitor data, grabbing from Innertube`,
);
console.log(`Generated visitor data: ${generatedVisitorData}`);
}

Expand Down
14 changes: 11 additions & 3 deletions server/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
import { SessionManager } from "./session_manager";
import { Command } from "@commander-js/extra-typings";
import express from "express";
import bodyParser from "body-parser";

const PORT_NUMBER = 4416;
const program = new Command()
.option("-p, --port <PORT>")
.option("--verbose");

program.parse();
const options = program.opts();

const PORT_NUMBER = options.port || 4416;

const httpServer = express();
httpServer.use(bodyParser.json());
Expand All @@ -14,7 +22,7 @@ httpServer.listen({

console.log(`Started POT server on port ${PORT_NUMBER}`);

const sessionManager = new SessionManager();
const sessionManager = new SessionManager(options.verbose || false);
httpServer.post("/get_pot", async (request, response) => {
const visitorData = request.body.visitor_data as string;
const dataSyncId = request.body.data_sync_id as string;
Expand All @@ -36,7 +44,7 @@ httpServer.post("/get_pot", async (request, response) => {
const generatedVisitorData = await sessionManager.generateVisitorData();
if (!generatedVisitorData) {
response.status(500);
response.send("Error generating visitor data");
response.send({error: "Error generating visitor data"});
return;
}

Expand Down

0 comments on commit 8eb6db2

Please sign in to comment.