Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement voice gateway v8 #146

Merged
merged 2 commits into from
Jan 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 39 additions & 15 deletions src/client/voice/BaseMediaConnection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import {
type TransportEncryptor
} from "../encryptor/TransportEncryptor.js";
import { STREAMS_SIMULCAST, SupportedEncryptionModes, type SupportedVideoCodec } from "../../utils.js";
import type { ReadyMessage, SelectProtocolAck } from "./VoiceMessageTypes.js";
import WebSocket from 'ws';
import EventEmitter from "node:events";
import type { Message, GatewayRequest, GatewayResponse } from "./VoiceMessageTypes.js";

type VoiceConnectionStatus =
{
Expand All @@ -27,6 +27,11 @@ type WebRtcParameters = {
supportedEncryptionModes: SupportedEncryptionModes[]
}

type ValueOf<T> =
T extends (infer U)[] ? U :
T extends Record<string, infer U> ? U :
never

export const CodecPayloadType = {
"opus": {
name: "opus", type: "audio", priority: 1000, payload_type: 120
Expand All @@ -46,7 +51,7 @@ export const CodecPayloadType = {
"AV1": {
name: "AV1", type: "video", priority: 1000, payload_type: 109, rtx_payload_type: 110, encode: true, decode: true
}
}
} as const;

export interface StreamOptions {
/**
Expand Down Expand Up @@ -126,6 +131,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
public webRtcParams: WebRtcParameters | null = null;
private _streamOptions: StreamOptions;
private _transportEncryptor?: TransportEncryptor;
private _sequenceNumber = -1;

constructor(guildId: string, botId: string, channelId: string, options: Partial<StreamOptions>, callback: (udp: MediaUdp) => void) {
super();
Expand Down Expand Up @@ -193,7 +199,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
return
this.status.started = true;

this.ws = new WebSocket(`wss://${this.server}/?v=7`, {
this.ws = new WebSocket(`wss://${this.server}/?v=8`, {
followRedirects: true
});
this.ws.on("open", () => {
Expand Down Expand Up @@ -224,7 +230,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
}
}

handleReady(d: ReadyMessage): void {
handleReady(d: Message.Ready): void {
// we hardcoded the STREAMS_SIMULCAST, which will always be array of 1
const stream = d.streams[0];
this.webRtcParams = {
Expand All @@ -238,7 +244,7 @@ export abstract class BaseMediaConnection extends EventEmitter {
this.udp.updatePacketizer();
}

handleProtocolAck(d: SelectProtocolAck): void {
handleProtocolAck(d: Message.SelectProtocolAck): void {
const secretKey = Buffer.from(d.secret_key);
switch (d.mode)
{
Expand All @@ -253,9 +259,12 @@ export abstract class BaseMediaConnection extends EventEmitter {
}

setupEvents(): void {
this.ws?.on('message', (data: string) => {
// Maybe map out all the types here to avoid any?
const { op, d } = JSON.parse(data);
this.ws?.on('message', (data, isBinary) => {
if (isBinary)
return;
const { op, d, seq } = JSON.parse(data.toString()) as GatewayResponse;
if (seq)
this._sequenceNumber = seq;

if (op === VoiceOpCodes.READY) { // ready
this.handleReady(d);
Expand Down Expand Up @@ -292,11 +301,14 @@ export abstract class BaseMediaConnection extends EventEmitter {
clearInterval(this.interval);
}
this.interval = setInterval(() => {
this.sendOpcode(VoiceOpCodes.HEARTBEAT, 42069);
this.sendOpcode(VoiceOpCodes.HEARTBEAT, {
t: Date.now(),
seq_ack: this._sequenceNumber
});
}, interval);
}

sendOpcode(code:number, data: unknown): void {
sendOpcode<T extends GatewayRequest>(code: T["op"], data: T["d"]): void {
this.ws?.send(JSON.stringify({
op: code,
d: data
Expand All @@ -307,6 +319,12 @@ export abstract class BaseMediaConnection extends EventEmitter {
** identifies with media server with credentials
*/
identify(): void {
if (!this.serverId)
throw new Error("Server ID is null or empty");
if (!this.session_id)
throw new Error("Session ID is null or empty");
if (!this.token)
throw new Error("Token is null or empty");
this.sendOpcode(VoiceOpCodes.IDENTIFY, {
server_id: this.serverId,
user_id: this.botId,
Expand All @@ -318,10 +336,17 @@ export abstract class BaseMediaConnection extends EventEmitter {
}

resume(): void {
if (!this.serverId)
throw new Error("Server ID is null or empty");
if (!this.session_id)
throw new Error("Session ID is null or empty");
if (!this.token)
throw new Error("Token is null or empty");
this.sendOpcode(VoiceOpCodes.RESUME, {
server_id: this.serverId,
session_id: this.session_id,
token: this.token,
seq_ack: this._sequenceNumber
});
}

Expand All @@ -332,6 +357,8 @@ export abstract class BaseMediaConnection extends EventEmitter {
*/
setProtocols(): Promise<void> {
const { ip, port } = this.udp;
if (!ip || !port)
throw new Error("IP or port is undefined (this shouldn't happen!!!)");
// select encryption mode
// From Discord docs:
// You must support aead_xchacha20_poly1305_rtpsize. You should prefer to use aead_aes256_gcm_rtpsize when it is available.
Expand All @@ -349,15 +376,12 @@ export abstract class BaseMediaConnection extends EventEmitter {
return new Promise((resolve) => {
this.sendOpcode(VoiceOpCodes.SELECT_PROTOCOL, {
protocol: "udp",
codecs: Object.values(CodecPayloadType),
codecs: Object.values(CodecPayloadType) as ValueOf<typeof CodecPayloadType>[],
data: {
address: ip,
port: port,
mode: encryptionMode
},
address: ip,
port: port,
mode: encryptionMode
}
});
this.once("select_protocol_ack", () => resolve());
})
Expand Down
163 changes: 149 additions & 14 deletions src/client/voice/VoiceMessageTypes.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,6 @@
import type { VoiceOpCodes } from "./VoiceOpCodes.js"
import type { SupportedEncryptionModes } from "../../utils.js"

export type ReadyMessage = {
ssrc: number,
ip: string,
port: number,
modes: SupportedEncryptionModes[],
experiments: string[],
streams: StreamInfo[]
}

type StreamInfo = {
active: boolean,
quality: number,
Expand All @@ -21,9 +13,152 @@ type StreamInfo = {
type: string
}

export type SelectProtocolAck = {
secret_key: number[],
audio_codec: string,
video_codec: string,
mode: string,
type SimulcastInfo = {
type: string,
rid: string,
quality: number
}

type CodecPayloadType = {
name: string,
type: "audio",
priority: number,
payload_type: number
} | {
name: string,
type: "video",
priority: number,
payload_type: number,
rtx_payload_type: number,
encode: boolean,
decode: boolean
}

export namespace Message {
// Request messages
export type Identify = {
server_id: string,
user_id: string,
session_id: string,
token: string,
video: boolean,
streams: SimulcastInfo[]
}

export type Resume = {
server_id: string,
session_id: string,
token: string,
seq_ack: number
}

export type Heartbeat = {
t: number,
seq_ack?: number
}

export type SelectProtocol = {
protocol: string,
codecs: CodecPayloadType[],
data: {
address: string,
port: number,
mode: SupportedEncryptionModes
}
}

export type Video = {
audio_ssrc: number,
video_ssrc: number,
rtx_ssrc: number,
streams: {
type: "video",
rid: string,
ssrc: number,
active: boolean,
quality: number,
rtx_ssrc: number,
max_bitrate: number,
max_framerate: number,
max_resolution: {
type: "fixed",
width: number,
height: number
}
}[]
}

// Response messages
export type Hello = {
heartbeat_interval: number
}

export type Ready = {
ssrc: number,
ip: string,
port: number,
modes: SupportedEncryptionModes[],
experiments: string[],
streams: StreamInfo[]
}

export type Speaking = {
speaking: 0 | 1 | 2,
delay: number,
ssrc: number
}

export type SelectProtocolAck = {
secret_key: number[],
audio_codec: string,
video_codec: string,
mode: string,
}

export type HeartbeatAck = {
t: number
}
}

export namespace GatewayResponse {
type Generic<Op extends VoiceOpCodes, T extends Record<string, unknown> | null> = {
op: Op,
d: T,
seq?: number
}
export type Hello = Generic<VoiceOpCodes.HELLO, Message.Hello>
export type Ready = Generic<VoiceOpCodes.READY, Message.Ready>
export type Resumed = Generic<VoiceOpCodes.RESUMED, null>
export type Speaking = Generic<VoiceOpCodes.SPEAKING, Message.Speaking>
export type SelectProtocolAck = Generic<VoiceOpCodes.SELECT_PROTOCOL_ACK, Message.SelectProtocolAck>
export type HeartbeatAck = Generic<VoiceOpCodes.HEARTBEAT_ACK, Message.HeartbeatAck>
}

export type GatewayResponse =
GatewayResponse.Hello |
GatewayResponse.Ready |
GatewayResponse.Resumed |
GatewayResponse.Speaking |
GatewayResponse.SelectProtocolAck |
GatewayResponse.HeartbeatAck

export namespace GatewayRequest {
type Generic<Op extends VoiceOpCodes, T extends Record<string, unknown> | null> = {
op: Op,
d: T
}
export type Identify = Generic<VoiceOpCodes.IDENTIFY, Message.Identify>
export type Resume = Generic<VoiceOpCodes.RESUME, Message.Resume>
export type Heartbeat = Generic<VoiceOpCodes.HEARTBEAT, Message.Heartbeat>
export type SelectProtocol = Generic<VoiceOpCodes.SELECT_PROTOCOL, Message.SelectProtocol>
export type Video = Generic<VoiceOpCodes.VIDEO, Message.Video>
export type Speaking = Generic<VoiceOpCodes.SPEAKING, Message.Speaking>
}

export type GatewayRequest =
GatewayRequest.Identify |
GatewayRequest.Resume |
GatewayRequest.Heartbeat |
GatewayRequest.SelectProtocol |
GatewayRequest.Video |
GatewayRequest.Speaking