diff --git a/.changeset/good-carrots-warn.md b/.changeset/good-carrots-warn.md new file mode 100644 index 00000000..915e8015 --- /dev/null +++ b/.changeset/good-carrots-warn.md @@ -0,0 +1,5 @@ +--- +'@livekit/agents-plugin-elevenlabs': minor +--- + +Add support for language code in 11Labs TTS package. diff --git a/.changeset/shy-crews-whisper.md b/.changeset/shy-crews-whisper.md new file mode 100644 index 00000000..9879dbe0 --- /dev/null +++ b/.changeset/shy-crews-whisper.md @@ -0,0 +1,5 @@ +--- +"@livekit/agents": patch +--- + +fix LLM retries breaking on VoicePipelineAgent diff --git a/agents/src/pipeline/pipeline_agent.ts b/agents/src/pipeline/pipeline_agent.ts index 3997cfe1..1b6bf875 100644 --- a/agents/src/pipeline/pipeline_agent.ts +++ b/agents/src/pipeline/pipeline_agent.ts @@ -696,6 +696,35 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter< const isUsingTools = handle.source instanceof LLMStream && !!handle.source.functionCalls.length; const interrupted = handle.interrupted; + if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) { + if (handle.extraToolsMessages) { + this.chatCtx.messages.push(...handle.extraToolsMessages); + } + if (interrupted) { + collectedText + '…'; + } + + const msg = ChatMessage.create({ text: collectedText, role: ChatRole.ASSISTANT }); + this.chatCtx.messages.push(msg); + + handle.markSpeechCommitted(); + if (interrupted) { + this.emit(VPAEvent.AGENT_SPEECH_INTERRUPTED, msg); + } else { + this.emit(VPAEvent.AGENT_SPEECH_COMMITTED, msg); + } + + this.#logger + .child({ + agentTranscript: collectedText, + interrupted, + speechId: handle.id, + }) + .debug('committed agent speech'); + + handle.setDone(); + } + const executeFunctionCalls = async () => { // if the answer is using tools, execute the functions and automatically generate // a response to the user question from the returned values @@ -708,7 +737,7 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter< return; } - if (!userQuestion || !handle.userCommitted) { + if (userQuestion && !handle.userCommitted) { throw new Error('user speech should have been committed before using tools'); } const llmStream = handle.source; @@ -776,8 +805,9 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter< this.emit(VPAEvent.FUNCTION_CALLS_FINISHED, calledFuncs); }; + let finished = false; const task = executeFunctionCalls().then(() => { - handle.markNestedSpeechFinished(); + finished = true; }); while (!handle.nestedSpeechFinished) { const changed = handle.nestedSpeechChanged(); @@ -789,36 +819,13 @@ export class VoicePipelineAgent extends (EventEmitter as new () => TypedEmitter< handle.nestedSpeechHandles.shift(); this.#playingSpeech = handle; } - } - - if (handle.addToChatCtx && (!userQuestion || handle.userCommitted)) { - if (handle.extraToolsMessages) { - this.chatCtx.messages.push(...handle.extraToolsMessages); - } - if (interrupted) { - collectedText + '…'; - } - const msg = ChatMessage.create({ text: collectedText, role: ChatRole.ASSISTANT }); - this.chatCtx.messages.push(msg); - - handle.markSpeechCommitted(); - if (interrupted) { - this.emit(VPAEvent.AGENT_SPEECH_INTERRUPTED, msg); - } else { - this.emit(VPAEvent.AGENT_SPEECH_COMMITTED, msg); + handle.nestedSpeechHandles.forEach(() => handle.nestedSpeechHandles.pop()); + if (finished) { + handle.markNestedSpeechFinished(); } - - this.#logger - .child({ - agentTranscript: collectedText, - interrupted, - speechId: handle.id, - }) - .debug('committed agent speech'); - - handle.setDone(); } + handle.setDone(); } #synthesizeAgentSpeech( diff --git a/plugins/elevenlabs/src/tts.ts b/plugins/elevenlabs/src/tts.ts index 007ba142..ae6799da 100644 --- a/plugins/elevenlabs/src/tts.ts +++ b/plugins/elevenlabs/src/tts.ts @@ -41,6 +41,7 @@ export interface TTSOptions { apiKey?: string; voice: Voice; modelID: TTSModels | string; + languageCode?: string; baseURL: string; encoding: TTSEncoding; streamingLatency: number; @@ -134,6 +135,7 @@ export class SynthesizeStream extends tts.SynthesizeStream { output_format: opts.encoding, optimize_streaming_latency: `${opts.streamingLatency}`, enable_ssml_parsing: `${opts.enableSsmlParsing}`, + ...(opts.languageCode && { language_code: opts.languageCode }), }; Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v)); this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');