Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[server] Add support for proxies #33

Merged
merged 25 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c22d116
[plugin] pass proxy to the server
grqz Sep 17, 2024
cb7067e
sort import block
grqz Sep 17, 2024
40e4a2e
[debug] print proxies
grqz Sep 17, 2024
6142dea
stringify proxies
grqz Sep 17, 2024
1813218
Server side implementation WIP
Brainicism Sep 17, 2024
b5665f9
use ydl.urlopen again
Brainicism Sep 18, 2024
472e7e1
remove dundant object.values
Brainicism Sep 18, 2024
5928cad
remove ellipsis from the features tuple
grqz Sep 18, 2024
2e5bfc5
[plugin] remove redundant assignment to rh.proxies
grqx Sep 18, 2024
4ead3d2
Add error handling for bgutils functions
Brainicism Sep 18, 2024
32dd899
add support for proxies in process.env
grqx Sep 18, 2024
70f344e
variable naming
grqx Sep 18, 2024
a80e3cf
code formatting
grqx Sep 18, 2024
51a217a
process undefined proxy
grqx Sep 18, 2024
756c316
Error handling for bgConfig fetch
Brainicism Sep 18, 2024
9645c41
Merge branch 'server/proxy' of github.com:Brainicism/bgutil-ytdlp-pot…
Brainicism Sep 18, 2024
8385c3c
Add support for env ALL_PROXY
grqx Sep 18, 2024
6f7ecd2
code formatting
grqx Sep 18, 2024
7a0a9b0
Simplify retrieving proxy from env variables
Brainicism Sep 18, 2024
06728e7
Add trailing comma for _SUPPORTED_FEATURES
Brainicism Sep 18, 2024
74d2ba0
prioritise env HTTPS_PROXY over ALL_PROXY
grqx Sep 18, 2024
9d02474
Select proxy for youtube
Brainicism Sep 19, 2024
d97049e
Merge branch 'server/proxy' of github.com:Brainicism/bgutil-ytdlp-pot…
Brainicism Sep 19, 2024
231045e
switch to select_proxy with yt api hostname
grqx Sep 19, 2024
e1f7ea2
fix proxy type: bool->str
grqx Sep 19, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions plugin/yt_dlp_plugins/extractor/getpot_bgutil_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
if typing.TYPE_CHECKING:
from yt_dlp import YoutubeDL

from yt_dlp.networking.common import Request
from yt_dlp.networking._helper import select_proxy
from yt_dlp.networking.common import Features, Request
from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest

try:
Expand All @@ -21,8 +22,12 @@
@register_provider
class BgUtilHTTPPotProviderRH(GetPOTProvider):
_PROVIDER_NAME = 'BgUtilHTTPPot'
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded',
'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
VERSION = __version__
_SUPPORTED_PROXY_SCHEMES = (
'http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)

def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs):
base_url = ydl.get_info_extractor('Youtube')._configuration_arg(
Expand All @@ -31,9 +36,11 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data
raise UnsupportedRequest(
'One of [data_sync_id, visitor_data] must be passed')
try:
response = ydl.urlopen(Request(f'{base_url}/ping', extensions={'timeout': 5.0}))
response = ydl.urlopen(Request(
f'{base_url}/ping', extensions={'timeout': 5.0}, proxies={'all': None}))
except Exception as e:
raise UnsupportedRequest(f'Error reaching GET /ping (caused by {e!s})') from e
raise UnsupportedRequest(
f'Error reaching GET /ping (caused by {e!s})') from e
try:
response = json.load(response)
except json.JSONDecodeError as e:
Expand All @@ -51,15 +58,21 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data

def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
self._logger.info('Generating POT via HTTP server')
if ((proxy := select_proxy('https://jnn-pa.googleapis.com', self.proxies))
!= select_proxy('https://youtube.com', self.proxies)):
self._logger.warning(
'Proxies for https://youtube.com and https://jnn-pa.googleapis.com are different. '
'This is likely to cause subsequent errors.')

try:
response = ydl.urlopen(Request(
f'{self.base_url}/get_pot', data=json.dumps({
'client': client,
'visitor_data': visitor_data,
'data_sync_id': data_sync_id,
'proxy': proxy,
}).encode(), headers={'Content-Type': 'application/json'},
extensions={'timeout': 12.5}))
extensions={'timeout': 12.5}, proxies={'all': None}))
except Exception as e:
raise RequestError(
f'Error reaching POST /get_pot (caused by {e!s})') from e
Expand Down
18 changes: 15 additions & 3 deletions plugin/yt_dlp_plugins/extractor/getpot_bgutil_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

if typing.TYPE_CHECKING:
from yt_dlp import YoutubeDL
from yt_dlp.networking._helper import select_proxy
from yt_dlp.networking.common import Features
from yt_dlp.networking.exceptions import RequestError, UnsupportedRequest
from yt_dlp.utils import Popen, classproperty

Expand All @@ -23,8 +25,12 @@
@register_provider
class BgUtilScriptPotProviderRH(GetPOTProvider):
_PROVIDER_NAME = 'BgUtilScriptPot'
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded', 'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
_SUPPORTED_CLIENTS = ('web', 'web_safari', 'web_embedded',
'web_music', 'web_creator', 'mweb', 'tv_embedded', 'tv')
VERSION = __version__
_SUPPORTED_PROXY_SCHEMES = (
'http', 'https', 'socks4', 'socks4a', 'socks5', 'socks5h')
_SUPPORTED_FEATURES = (Features.NO_PROXY, Features.ALL_PROXY)

@classproperty(cache=True)
def _default_script_path(self):
Expand All @@ -51,8 +57,13 @@ def _validate_get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data
def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=None, player_url=None, **kwargs) -> str:
self._logger.info(
f'Generating POT via script: {self.script_path}')

command_args = ['node', self.script_path]
if proxy := select_proxy('https://jnn-pa.googleapis.com', self.proxies):
if proxy != select_proxy('https://youtube.com', self.proxies):
self._logger.warning(
'Proxies for https://youtube.com and https://jnn-pa.googleapis.com are different. '
'This is likely to cause subsequent errors.')
command_args.extend(['-p', proxy])
if data_sync_id:
command_args.extend(['-d', data_sync_id])
elif visitor_data:
Expand All @@ -75,7 +86,8 @@ def _get_pot(self, client: str, ydl: YoutubeDL, visitor_data=None, data_sync_id=
msg += f'\nstderr:\n{stderr.strip()}'
self._logger.debug(msg)
if returncode:
raise RequestError(f'_get_pot_via_script failed with returncode {returncode}')
raise RequestError(
f'_get_pot_via_script failed with returncode {returncode}')

try:
# The JSON response is always the last line
Expand Down
3 changes: 3 additions & 0 deletions server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@
},
"dependencies": {
"@commander-js/extra-typings": "commander-js/extra-typings",
"axios": "^1.7.7",
"bgutils-js": "^1.1.0",
"body-parser": "^1.20.2",
"commander": "^12.1.0",
"express": "^4.19.2",
"https-proxy-agent": "^7.0.5",
"jsdom": "^25.0.0",
"socks-proxy-agent": "^8.0.4",
"youtubei.js": "^10.4.0"
},
"devDependencies": {
Expand Down
10 changes: 8 additions & 2 deletions server/src/generate_once.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ const CACHE_PATH = path.resolve(__dirname, "..", "cache.json");
const program = new Command()
.option("-v, --visitor-data <visitordata>")
.option("-d, --data-sync-id <data-sync-id>")
.option("-p, --proxy <proxy-all>")
.option("--verbose");

program.parse();
const options = program.opts();

(async () => {
const dataSyncId = options.dataSyncId;
const visitorData = options.visitorData;
const dataSyncId = options.dataSyncId;
const proxy = options.proxy || "";
const verbose = options.verbose || false;
let visitIdentifier: string;
const cache: YoutubeSessionDataCaches = {};
Expand Down Expand Up @@ -57,7 +59,11 @@ const options = program.opts();
visitIdentifier = generatedVisitorData;
}

const sessionData = await sessionManager.generatePoToken(visitIdentifier);
const sessionData = await sessionManager.generatePoToken(
visitIdentifier,
proxy,
);

try {
fs.writeFileSync(
CACHE_PATH,
Expand Down
7 changes: 5 additions & 2 deletions server/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const sessionManager = new SessionManager(options.verbose || false);
httpServer.post("/get_pot", async (request, response) => {
const visitorData = request.body.visitor_data as string;
const dataSyncId = request.body.data_sync_id as string;

const proxy: string = request.body.proxy;
let visitIdentifier: string;

// prioritize data sync id for authenticated requests, if passed
Expand All @@ -51,7 +51,10 @@ httpServer.post("/get_pot", async (request, response) => {
visitIdentifier = generatedVisitorData;
}

const sessionData = await sessionManager.generatePoToken(visitIdentifier);
const sessionData = await sessionManager.generatePoToken(
visitIdentifier,
proxy,
);
response.send({
po_token: sessionData.poToken,
visit_identifier: sessionData.visitIdentifier,
Expand Down
147 changes: 123 additions & 24 deletions server/src/session_manager.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import { BG } from "bgutils-js";
import { BG, BgConfig, DescrambledChallenge } from "bgutils-js";
import { JSDOM } from "jsdom";
import { Innertube } from "youtubei.js";
import { HttpsProxyAgent } from "https-proxy-agent";
import axios from "axios";
import { Agent } from "https";
import { SocksProxyAgent } from "socks-proxy-agent";

interface YoutubeSessionData {
poToken: string;
Expand All @@ -12,17 +16,40 @@ export interface YoutubeSessionDataCaches {
[visitIdentifier: string]: YoutubeSessionData;
}

export class SessionManager {
shouldLog: boolean;
class Logger {
private shouldLog: boolean;

constructor(shouldLog = true) {
this.shouldLog = shouldLog;
}

debug(msg: string) {
if (this.shouldLog) console.debug(msg);
}

log(msg: string) {
if (this.shouldLog) console.log(msg);
}

warn(msg: string) {
if (this.shouldLog) console.warn(msg);
}

error(msg: string) {
if (this.shouldLog) console.error(msg);
}
}

export class SessionManager {
private youtubeSessionDataCaches: YoutubeSessionDataCaches = {};
private TOKEN_TTL_HOURS: number;
private logger: Logger;

constructor(
shouldLog = true,
youtubeSessionDataCaches: YoutubeSessionDataCaches = {},
) {
this.shouldLog = shouldLog;
this.logger = new Logger(shouldLog);
this.setYoutubeSessionDataCaches(youtubeSessionDataCaches);
this.TOKEN_TTL_HOURS = process.env.TOKEN_TTL
? parseInt(process.env.TOKEN_TTL)
Expand Down Expand Up @@ -59,35 +86,62 @@ export class SessionManager {
this.youtubeSessionDataCaches = youtubeSessionData || {};
}

log(msg: string) {
if (this.shouldLog) console.log(msg);
}

async generateVisitorData(): Promise<string | null> {
const innertube = await Innertube.create({ retrieve_player: false });
const visitorData = innertube.session.context.client.visitorData;
if (!visitorData) {
console.error("Unable to generate visitor data via Innertube");
this.logger.error("Unable to generate visitor data via Innertube");
return null;
}

return visitorData;
}

getProxyDispatcher(proxy: string | undefined): Agent | undefined {
if (!proxy) return undefined;
let protocol: string;
try {
const parsedUrl = new URL(proxy);
protocol = parsedUrl.protocol.replace(":", "");
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
// assume http if no protocol was passed
protocol = "http";
proxy = `http://${proxy}`;
}

switch (protocol) {
case "http":
case "https":
this.logger.log(`Using HTTP/HTTPS proxy: ${proxy}`);
return new HttpsProxyAgent(proxy);
case "socks":
case "socks4":
case "socks4a":
case "socks5":
case "socks5h":
this.logger.log(`Using SOCKS proxy: ${proxy}`);
return new SocksProxyAgent(proxy);
default:
this.logger.warn(`Unsupported proxy protocol: ${proxy}`);
return undefined;
}
}
// mostly copied from https://github.com/LuanRT/BgUtils/tree/main/examples/node
async generatePoToken(
visitIdentifier: string,
proxy: string = "",
): Promise<YoutubeSessionData> {
this.cleanupCaches();
const sessionData = this.youtubeSessionDataCaches[visitIdentifier];
if (sessionData) {
this.log(
this.logger.log(
`POT for ${visitIdentifier} still fresh, returning cached token`,
);
return sessionData;
}

this.log(
this.logger.log(
`POT for ${visitIdentifier} stale or not yet generated, generating...`,
);

Expand All @@ -98,32 +152,77 @@ export class SessionManager {
globalThis.window = dom.window as any;
globalThis.document = dom.window.document;

const bgConfig = {
fetch: (url: any, options: any) => fetch(url, options),
let dispatcher: Agent | undefined;
if (proxy) {
dispatcher = this.getProxyDispatcher(proxy);
} else {
dispatcher = this.getProxyDispatcher(
process.env.HTTPS_PROXY ||
process.env.HTTP_PROXY ||
process.env.ALL_PROXY,
);
}

const bgConfig: BgConfig = {
fetch: async (url: any, options: any): Promise<any> => {
try {
const response = await axios.post(url, options.body, {
headers: options.headers,
httpsAgent: dispatcher,
});

return {
ok: true,
json: async () => {
return response.data;
},
};
// eslint-disable-next-line @typescript-eslint/no-unused-vars
} catch (e) {
return {
ok: false,
json: async () => {
return null;
},
};
}
},
globalObj: globalThis,
identity: visitIdentifier,
requestKey,
};

const challenge = await BG.Challenge.create(bgConfig);

let challenge: DescrambledChallenge | undefined;
try {
challenge = await BG.Challenge.create(bgConfig);
} catch (e) {
throw new Error(
`Error while attempting to retrieve BG challenge. err = ${e}`,
);
}
if (!challenge) throw new Error("Could not get Botguard challenge");

if (challenge.script) {
const script = challenge.script.find((sc) => sc !== null);
if (script) new Function(script)();
} else {
this.log("Unable to load Botguard.");
this.logger.log("Unable to load Botguard.");
}

const poToken = await BG.PoToken.generate({
program: challenge.challenge,
globalName: challenge.globalName,
bgConfig,
});
let poToken: string | undefined;
try {
poToken = await BG.PoToken.generate({
program: challenge.challenge,
globalName: challenge.globalName,
bgConfig,
});
} catch (e) {
throw new Error(
`Error while trying to generate PO token. e = ${e}`,
);
}

this.log(`po_token: ${poToken}`);
this.log(`visit_identifier: ${visitIdentifier}`);
this.logger.log(`po_token: ${poToken}`);
this.logger.log(`visit_identifier: ${visitIdentifier}`);

if (!poToken) {
throw new Error("po_token unexpected undefined");
Expand Down
Loading