diff --git a/app/api/common.ts b/app/api/common.ts index 24453dd96356..09cafd41bc07 100644 --- a/app/api/common.ts +++ b/app/api/common.ts @@ -96,11 +96,12 @@ export async function requestOpenai(req: NextRequest) { } } + const contentType = req.headers.get("Content-Type") ?? "application/json"; const fetchUrl = cloudflareAIGatewayUrl(`${baseUrl}/${path}`); console.log("fetchUrl", fetchUrl); const fetchOptions: RequestInit = { headers: { - "Content-Type": "application/json", + "Content-Type": contentType, "Cache-Control": "no-store", [authHeaderName]: authValue, ...(serverConfig.openaiOrgId && { @@ -117,7 +118,7 @@ export async function requestOpenai(req: NextRequest) { }; // #1815 try to refuse gpt4 request - if (serverConfig.customModels && req.body) { + if (serverConfig.customModels && req.body && contentType.includes("json")) { try { const clonedBody = await req.text(); fetchOptions.body = clonedBody; diff --git a/app/client/api.ts b/app/client/api.ts index 7e1d0135ed62..06c2aa2b5b1f 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -261,6 +261,18 @@ export function getHeaders(ignoreHeaders?: boolean) { ? accessStore.iflytekApiKey + ":" + accessStore.iflytekApiSecret : "" : accessStore.openaiApiKey; + if (ignoreHeaders) { + return { + isGoogle: false, + isAzure: false, + isAnthropic: false, + isBaidu: false, + isByteDance: false, + isAlibaba: false, + apiKey: accessStore.openaiApiKey, + isEnabledAccessControl, + }; + } return { isGoogle, isAzure, diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 02115140b72b..d2929ee3fe78 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -205,7 +205,7 @@ export class ChatGPTApi implements LLMApi { signal: controller.signal, headers: headers, }; - + console.log("[payload]: ", payload); // make a fetch request const requestTimeoutId = setTimeout( () => controller.abort(), diff --git a/app/utils/speech.ts b/app/utils/speech.ts index dc8102879fb3..ded3437e1c79 100644 --- a/app/utils/speech.ts +++ b/app/utils/speech.ts @@ -32,6 +32,11 @@ export class OpenAITranscriptionApi extends SpeechApi { } async start(): Promise { + // 如果已经在监听,先停止当前的会话 + if (this.listeningStatus) { + await this.stop(); + } + // @ts-ignore navigator.getUserMedia = // @ts-ignore @@ -42,28 +47,30 @@ export class OpenAITranscriptionApi extends SpeechApi { navigator.mozGetUserMedia || // @ts-ignore navigator.msGetUserMedia; - if (navigator.mediaDevices) { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - this.mediaRecorder = new MediaRecorder(stream); - this.mediaRecorder.ondataavailable = (e) => { - if (e.data && e.data.size > 0) { - this.audioChunks.push(e.data); - } - }; - this.stream = stream; + if (navigator.mediaDevices) { + try { + const stream = await navigator.mediaDevices.getUserMedia({ + audio: true, + }); + this.stream = stream; + this.mediaRecorder = new MediaRecorder(stream); + this.mediaRecorder.ondataavailable = (e) => { + if (e.data && e.data.size > 0) { + this.audioChunks.push(e.data); + } + }; + } catch (error) { + console.error("Error accessing media devices:", error); + return; + } } else { - console.warn("Media Decives will work only with SSL"); + console.warn("Media Devices will work only with SSL"); return; } this.audioChunks = []; - - // this.recorder.addEventListener("dataavailable", (event) => { - // this.audioChunks.push(event.data); - // }); - - this.mediaRecorder.start(1000); + this.mediaRecorder!.start(1000); this.listeningStatus = true; } @@ -79,6 +86,13 @@ export class OpenAITranscriptionApi extends SpeechApi { const transcription = await llm.transcription({ file: audioBlob }); this.onTranscription(transcription); this.listeningStatus = false; + + // 停止所有音轨 + if (this.stream) { + this.stream.getTracks().forEach((track) => track.stop()); + this.stream = null; + } + resolve(); }); @@ -90,37 +104,117 @@ export class OpenAITranscriptionApi extends SpeechApi { export class WebTranscriptionApi extends SpeechApi { private listeningStatus = false; private recognitionInstance: any | null = null; + private shouldContinueListening = false; isListening = () => this.listeningStatus; constructor(transcriptionCallback?: TranscriptionCallback) { super(); - if (isFirefox()) return; + this.initRecognition(); + if (transcriptionCallback) { + this.onTranscriptionReceived(transcriptionCallback); + } + } + + private initRecognition(): void { const SpeechRecognition = (window as any).SpeechRecognition || - (window as any).webkitSpeechRecognition; + (window as any).webkitSpeechRecognition || + (window as any).msSpeechRecognition; + + if (!SpeechRecognition) { + console.error("SpeechRecognition is not supported in this browser"); + return; + } + this.recognitionInstance = new SpeechRecognition(); this.recognitionInstance.continuous = true; this.recognitionInstance.interimResults = true; this.recognitionInstance.lang = getSTTLang(); - if (transcriptionCallback) { - this.onTranscriptionReceived(transcriptionCallback); - } + this.recognitionInstance.onresult = (event: any) => { const result = event.results[event.results.length - 1]; if (result.isFinal) { this.onTranscription(result[0].transcript); } }; + + this.recognitionInstance.onerror = (event: any) => { + console.error("Speech recognition error:", event.error); + if (event.error !== "no-speech") { + this.listeningStatus = false; + this.shouldContinueListening = false; + } + }; + + this.recognitionInstance.onend = () => { + console.log("Speech recognition ended"); + this.listeningStatus = false; + if (this.shouldContinueListening) { + console.log("Restarting speech recognition"); + this.start(); + } + }; } async start(): Promise { - this.listeningStatus = true; - await this.recognitionInstance.start(); + if (this.listeningStatus) { + console.warn("Speech recognition is already active."); + return; + } + + if (!this.recognitionInstance) { + this.initRecognition(); + } + + if (!this.recognitionInstance) { + throw new Error("Failed to initialize speech recognition"); + } + + this.shouldContinueListening = true; + + return new Promise((resolve, reject) => { + const startRecognition = () => { + try { + this.recognitionInstance.start(); + this.listeningStatus = true; + console.log("Speech recognition started"); + resolve(); + } catch (error) { + console.error("Error starting speech recognition:", error); + this.listeningStatus = false; + this.shouldContinueListening = false; + reject(error); + } + }; + + startRecognition(); + }); } async stop(): Promise { - this.listeningStatus = false; - await this.recognitionInstance.stop(); + this.shouldContinueListening = false; + + if (!this.listeningStatus || !this.recognitionInstance) { + return; + } + + return new Promise((resolve) => { + const onStop = () => { + this.listeningStatus = false; + this.recognitionInstance.removeEventListener("end", onStop); + console.log("Speech recognition stopped"); + resolve(); + }; + + this.recognitionInstance.addEventListener("end", onStop); + + try { + this.recognitionInstance.stop(); + } catch (error) { + console.error("Error stopping speech recognition:", error); + onStop(); + } + }); } }