From 94bc05cfbcb88fbc22c79a5098e8b1f656551721 Mon Sep 17 00:00:00 2001 From: Long Nguyen Date: Thu, 30 Jan 2025 07:54:31 +0700 Subject: [PATCH 1/2] Implement voice gateway v8 --- src/client/voice/BaseMediaConnection.ts | 50 ++++++-- src/client/voice/VoiceMessageTypes.ts | 163 ++++++++++++++++++++++-- 2 files changed, 185 insertions(+), 28 deletions(-) diff --git a/src/client/voice/BaseMediaConnection.ts b/src/client/voice/BaseMediaConnection.ts index 1a4896d..dc4b262 100644 --- a/src/client/voice/BaseMediaConnection.ts +++ b/src/client/voice/BaseMediaConnection.ts @@ -6,9 +6,9 @@ import { type TransportEncryptor } from "../encryptor/TransportEncryptor.js"; import { STREAMS_SIMULCAST, SupportedEncryptionModes, type SupportedVideoCodec } from "../../utils.js"; -import type { ReadyMessage, SelectProtocolAck } from "./VoiceMessageTypes.js"; import WebSocket from 'ws'; import EventEmitter from "node:events"; +import type { Message, GatewayRequest, GatewayResponse } from "./VoiceMessageTypes.js"; type VoiceConnectionStatus = { @@ -27,6 +27,11 @@ type WebRtcParameters = { supportedEncryptionModes: SupportedEncryptionModes[] } +type ValueOf = + T extends (infer U)[] ? U : + T extends Record ? U : + never + export const CodecPayloadType = { "opus": { name: "opus", type: "audio", priority: 1000, payload_type: 120 @@ -46,7 +51,7 @@ export const CodecPayloadType = { "AV1": { name: "AV1", type: "video", priority: 1000, payload_type: 109, rtx_payload_type: 110, encode: true, decode: true } -} +} as const; export interface StreamOptions { /** @@ -126,6 +131,7 @@ export abstract class BaseMediaConnection extends EventEmitter { public webRtcParams: WebRtcParameters | null = null; private _streamOptions: StreamOptions; private _transportEncryptor?: TransportEncryptor; + private _sequenceNumber = -1; constructor(guildId: string, botId: string, channelId: string, options: Partial, callback: (udp: MediaUdp) => void) { super(); @@ -193,7 +199,7 @@ export abstract class BaseMediaConnection extends EventEmitter { return this.status.started = true; - this.ws = new WebSocket(`wss://${this.server}/?v=7`, { + this.ws = new WebSocket(`wss://${this.server}/?v=8`, { followRedirects: true }); this.ws.on("open", () => { @@ -224,7 +230,7 @@ export abstract class BaseMediaConnection extends EventEmitter { } } - handleReady(d: ReadyMessage): void { + handleReady(d: Message.Ready): void { // we hardcoded the STREAMS_SIMULCAST, which will always be array of 1 const stream = d.streams[0]; this.webRtcParams = { @@ -238,7 +244,7 @@ export abstract class BaseMediaConnection extends EventEmitter { this.udp.updatePacketizer(); } - handleProtocolAck(d: SelectProtocolAck): void { + handleProtocolAck(d: Message.SelectProtocolAck): void { const secretKey = Buffer.from(d.secret_key); switch (d.mode) { @@ -254,8 +260,9 @@ export abstract class BaseMediaConnection extends EventEmitter { setupEvents(): void { this.ws?.on('message', (data: string) => { - // Maybe map out all the types here to avoid any? - const { op, d } = JSON.parse(data); + const { op, d, seq } = JSON.parse(data) as GatewayResponse; + if (seq) + this._sequenceNumber = seq; if (op === VoiceOpCodes.READY) { // ready this.handleReady(d); @@ -292,11 +299,14 @@ export abstract class BaseMediaConnection extends EventEmitter { clearInterval(this.interval); } this.interval = setInterval(() => { - this.sendOpcode(VoiceOpCodes.HEARTBEAT, 42069); + this.sendOpcode(VoiceOpCodes.HEARTBEAT, { + t: Date.now(), + seq_ack: this._sequenceNumber + }); }, interval); } - sendOpcode(code:number, data: unknown): void { + sendOpcode(code: T["op"], data: T["d"]): void { this.ws?.send(JSON.stringify({ op: code, d: data @@ -307,6 +317,12 @@ export abstract class BaseMediaConnection extends EventEmitter { ** identifies with media server with credentials */ identify(): void { + if (!this.serverId) + throw new Error("Server ID is null or empty"); + if (!this.session_id) + throw new Error("Session ID is null or empty"); + if (!this.token) + throw new Error("Token is null or empty"); this.sendOpcode(VoiceOpCodes.IDENTIFY, { server_id: this.serverId, user_id: this.botId, @@ -318,10 +334,17 @@ export abstract class BaseMediaConnection extends EventEmitter { } resume(): void { + if (!this.serverId) + throw new Error("Server ID is null or empty"); + if (!this.session_id) + throw new Error("Session ID is null or empty"); + if (!this.token) + throw new Error("Token is null or empty"); this.sendOpcode(VoiceOpCodes.RESUME, { server_id: this.serverId, session_id: this.session_id, token: this.token, + seq_ack: this._sequenceNumber }); } @@ -332,6 +355,8 @@ export abstract class BaseMediaConnection extends EventEmitter { */ setProtocols(): Promise { const { ip, port } = this.udp; + if (!ip || !port) + throw new Error("IP or port is undefined (this shouldn't happen!!!)"); // select encryption mode // From Discord docs: // You must support aead_xchacha20_poly1305_rtpsize. You should prefer to use aead_aes256_gcm_rtpsize when it is available. @@ -349,15 +374,12 @@ export abstract class BaseMediaConnection extends EventEmitter { return new Promise((resolve) => { this.sendOpcode(VoiceOpCodes.SELECT_PROTOCOL, { protocol: "udp", - codecs: Object.values(CodecPayloadType), + codecs: Object.values(CodecPayloadType) as ValueOf[], data: { address: ip, port: port, mode: encryptionMode - }, - address: ip, - port: port, - mode: encryptionMode + } }); this.once("select_protocol_ack", () => resolve()); }) diff --git a/src/client/voice/VoiceMessageTypes.ts b/src/client/voice/VoiceMessageTypes.ts index f881693..3dd57b6 100644 --- a/src/client/voice/VoiceMessageTypes.ts +++ b/src/client/voice/VoiceMessageTypes.ts @@ -1,14 +1,6 @@ +import type { VoiceOpCodes } from "./VoiceOpCodes.js" import type { SupportedEncryptionModes } from "../../utils.js" -export type ReadyMessage = { - ssrc: number, - ip: string, - port: number, - modes: SupportedEncryptionModes[], - experiments: string[], - streams: StreamInfo[] -} - type StreamInfo = { active: boolean, quality: number, @@ -21,9 +13,152 @@ type StreamInfo = { type: string } -export type SelectProtocolAck = { - secret_key: number[], - audio_codec: string, - video_codec: string, - mode: string, +type SimulcastInfo = { + type: string, + rid: string, + quality: number +} + +type CodecPayloadType = { + name: string, + type: "audio", + priority: number, + payload_type: number +} | { + name: string, + type: "video", + priority: number, + payload_type: number, + rtx_payload_type: number, + encode: boolean, + decode: boolean } + +export namespace Message { + // Request messages + export type Identify = { + server_id: string, + user_id: string, + session_id: string, + token: string, + video: boolean, + streams: SimulcastInfo[] + } + + export type Resume = { + server_id: string, + session_id: string, + token: string, + seq_ack: number + } + + export type Heartbeat = { + t: number, + seq_ack?: number + } + + export type SelectProtocol = { + protocol: string, + codecs: CodecPayloadType[], + data: { + address: string, + port: number, + mode: SupportedEncryptionModes + } + } + + export type Video = { + audio_ssrc: number, + video_ssrc: number, + rtx_ssrc: number, + streams: { + type: "video", + rid: string, + ssrc: number, + active: boolean, + quality: number, + rtx_ssrc: number, + max_bitrate: number, + max_framerate: number, + max_resolution: { + type: "fixed", + width: number, + height: number + } + }[] + } + + // Response messages + export type Hello = { + heartbeat_interval: number + } + + export type Ready = { + ssrc: number, + ip: string, + port: number, + modes: SupportedEncryptionModes[], + experiments: string[], + streams: StreamInfo[] + } + + export type Speaking = { + speaking: 0 | 1 | 2, + delay: number, + ssrc: number + } + + export type SelectProtocolAck = { + secret_key: number[], + audio_codec: string, + video_codec: string, + mode: string, + } + + export type HeartbeatAck = { + t: number + } +} + +export namespace GatewayResponse { + type Generic | null> = { + op: Op, + d: T, + seq?: number + } + export type Hello = Generic + export type Ready = Generic + export type Resumed = Generic + export type Speaking = Generic + export type SelectProtocolAck = Generic + export type HeartbeatAck = Generic +} + +export type GatewayResponse = + GatewayResponse.Hello | + GatewayResponse.Ready | + GatewayResponse.Resumed | + GatewayResponse.Speaking | + GatewayResponse.SelectProtocolAck | + GatewayResponse.HeartbeatAck + +export namespace GatewayRequest { + type Generic | null> = { + op: Op, + d: T + } + export type Identify = Generic + export type Resume = Generic + export type Heartbeat = Generic + export type SelectProtocol = Generic + export type Video = Generic + export type Speaking = Generic +} + +export type GatewayRequest = + GatewayRequest.Identify | + GatewayRequest.Resume | + GatewayRequest.Heartbeat | + GatewayRequest.SelectProtocol | + GatewayRequest.Video | + GatewayRequest.Speaking From 2e069f789a19b98ea1699693a91e681b4d97b002 Mon Sep 17 00:00:00 2001 From: Long Nguyen Date: Thu, 30 Jan 2025 08:06:58 +0700 Subject: [PATCH 2/2] Ignore binary messages for now Binary messages are used for DAVE, which we can't deal with yet. --- src/client/voice/BaseMediaConnection.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/client/voice/BaseMediaConnection.ts b/src/client/voice/BaseMediaConnection.ts index dc4b262..ece8183 100644 --- a/src/client/voice/BaseMediaConnection.ts +++ b/src/client/voice/BaseMediaConnection.ts @@ -259,8 +259,10 @@ export abstract class BaseMediaConnection extends EventEmitter { } setupEvents(): void { - this.ws?.on('message', (data: string) => { - const { op, d, seq } = JSON.parse(data) as GatewayResponse; + this.ws?.on('message', (data, isBinary) => { + if (isBinary) + return; + const { op, d, seq } = JSON.parse(data.toString()) as GatewayResponse; if (seq) this._sequenceNumber = seq;