From 56eb9d14308188364652e1dfedf7c71ea2ef277b Mon Sep 17 00:00:00 2001 From: yihang3 Date: Wed, 21 Aug 2024 15:22:31 +0800 Subject: [PATCH 01/36] fix no max_tokens in payload when the vision model name does not contain 'vision'. --- app/client/platforms/openai.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index d4e262c16b4..2b60a8b6a29 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -190,7 +190,7 @@ export class ChatGPTApi implements LLMApi { }; // add max_tokens to vision model - if (visionModel && modelConfig.model.includes("preview")) { + if (visionModel) { requestPayload["max_tokens"] = Math.max(modelConfig.max_tokens, 4000); } } From 2f410fc09f62e67c32ac6142e99937d3e8f29601 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Tue, 27 Aug 2024 16:21:02 +0800 Subject: [PATCH 02/36] feat: add tts stt --- app/client/api.ts | 22 ++ app/client/platforms/openai.ts | 83 ++++++- app/components/chat.tsx | 150 ++++++++++++- app/components/settings.tsx | 24 ++ app/components/stt-config.tsx | 51 +++++ app/components/stt.module.scss | 119 ++++++++++ app/components/tts-config.tsx | 132 +++++++++++ app/components/tts.module.scss | 119 ++++++++++ app/constant.ts | 20 ++ app/icons/speak-stop.svg | 1 + app/icons/speak.svg | 1 + app/icons/voice-white.svg | 16 ++ app/locales/cn.ts | 34 +++ app/locales/en.ts | 2 + app/locales/index.ts | 31 +++ app/store/access.ts | 9 + app/store/config.ts | 49 +++++ app/utils/audio.ts | 45 ++++ app/utils/ms_edge_tts.ts | 391 +++++++++++++++++++++++++++++++++ app/utils/speech.ts | 126 +++++++++++ package.json | 3 +- yarn.lock | 24 ++ 22 files changed, 1446 insertions(+), 6 deletions(-) create mode 100644 app/components/stt-config.tsx create mode 100644 app/components/stt.module.scss create mode 100644 app/components/tts-config.tsx create mode 100644 app/components/tts.module.scss create mode 100644 app/icons/speak-stop.svg create mode 100644 app/icons/speak.svg create mode 100644 app/icons/voice-white.svg create mode 100644 app/utils/audio.ts create mode 100644 app/utils/ms_edge_tts.ts create mode 100644 app/utils/speech.ts diff --git a/app/client/api.ts b/app/client/api.ts index d7fb023a226..8d0877a0d4d 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -20,6 +20,7 @@ export const ROLES = ["system", "user", "assistant"] as const; export type MessageRole = (typeof ROLES)[number]; export const Models = ["gpt-3.5-turbo", "gpt-4"] as const; +export const TTSModels = ["tts-1", "tts-1-hd"] as const; export type ChatModel = ModelType; export interface MultimodalContent { @@ -48,6 +49,25 @@ export interface LLMConfig { style?: DalleRequestPayload["style"]; } +export interface SpeechOptions { + model: string; + input: string; + voice: string; + response_format?: string; + speed?: number; + onController?: (controller: AbortController) => void; +} + +export interface TranscriptionOptions { + model?: "whisper-1"; + file: Blob; + language?: string; + prompt?: string; + response_format?: "json" | "text" | "srt" | "verbose_json" | "vtt"; + temperature?: number; + onController?: (controller: AbortController) => void; +} + export interface ChatOptions { messages: RequestMessage[]; config: LLMConfig; @@ -80,6 +100,8 @@ export interface LLMModelProvider { export abstract class LLMApi { abstract chat(options: ChatOptions): Promise; + abstract speech(options: SpeechOptions): Promise; + abstract transcription(options: TranscriptionOptions): Promise; abstract usage(): Promise; abstract models(): Promise; } diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index d4e262c16b4..02115140b72 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -26,6 +26,8 @@ import { LLMModel, LLMUsage, MultimodalContent, + SpeechOptions, + TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { @@ -77,7 +79,7 @@ export interface DalleRequestPayload { export class ChatGPTApi implements LLMApi { private disableListModels = true; - path(path: string): string { + path(path: string, model?: string): string { const accessStore = useAccessStore.getState(); let baseUrl = ""; @@ -140,6 +142,85 @@ export class ChatGPTApi implements LLMApi { return res.choices?.at(0)?.message?.content ?? res; } + async speech(options: SpeechOptions): Promise { + const requestPayload = { + model: options.model, + input: options.input, + voice: options.voice, + response_format: options.response_format, + speed: options.speed, + }; + + console.log("[Request] openai speech payload: ", requestPayload); + + const controller = new AbortController(); + options.onController?.(controller); + + try { + const speechPath = this.path(OpenaiPath.SpeechPath, options.model); + const speechPayload = { + method: "POST", + body: JSON.stringify(requestPayload), + signal: controller.signal, + headers: getHeaders(), + }; + + // make a fetch request + const requestTimeoutId = setTimeout( + () => controller.abort(), + REQUEST_TIMEOUT_MS, + ); + + const res = await fetch(speechPath, speechPayload); + clearTimeout(requestTimeoutId); + return await res.arrayBuffer(); + } catch (e) { + console.log("[Request] failed to make a speech request", e); + throw e; + } + } + + async transcription(options: TranscriptionOptions): Promise { + const formData = new FormData(); + formData.append("file", options.file, "audio.wav"); + formData.append("model", options.model ?? "whisper-1"); + if (options.language) formData.append("language", options.language); + if (options.prompt) formData.append("prompt", options.prompt); + if (options.response_format) + formData.append("response_format", options.response_format); + if (options.temperature) + formData.append("temperature", options.temperature.toString()); + + console.log("[Request] openai audio transcriptions payload: ", options); + + const controller = new AbortController(); + options.onController?.(controller); + + try { + const path = this.path(OpenaiPath.TranscriptionPath, options.model); + const headers = getHeaders(true); + const payload = { + method: "POST", + body: formData, + signal: controller.signal, + headers: headers, + }; + + // make a fetch request + const requestTimeoutId = setTimeout( + () => controller.abort(), + REQUEST_TIMEOUT_MS, + ); + const res = await fetch(path, payload); + clearTimeout(requestTimeoutId); + const json = await res.json(); + return json.text; + } catch (e) { + console.log("[Request] failed to make a audio transcriptions request", e); + throw e; + } + } + async chat(options: ChatOptions) { const modelConfig = { ...useAppConfig.getState().modelConfig, diff --git a/app/components/chat.tsx b/app/components/chat.tsx index ed5b06799c3..e5391ad226c 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -10,11 +10,14 @@ import React, { } from "react"; import SendWhiteIcon from "../icons/send-white.svg"; +import VoiceWhiteIcon from "../icons/voice-white.svg"; import BrainIcon from "../icons/brain.svg"; import RenameIcon from "../icons/rename.svg"; import ExportIcon from "../icons/share.svg"; import ReturnIcon from "../icons/return.svg"; import CopyIcon from "../icons/copy.svg"; +import SpeakIcon from "../icons/speak.svg"; +import SpeakStopIcon from "../icons/speak-stop.svg"; import LoadingIcon from "../icons/three-dots.svg"; import LoadingButtonIcon from "../icons/loading.svg"; import PromptIcon from "../icons/prompt.svg"; @@ -64,6 +67,7 @@ import { getMessageImages, isVisionModel, isDalle3, + isFirefox, } from "../utils"; import { uploadImage as uploadImageRemote } from "@/app/utils/chat"; @@ -73,7 +77,7 @@ import dynamic from "next/dynamic"; import { ChatControllerPool } from "../client/controller"; import { DalleSize, DalleQuality, DalleStyle } from "../typing"; import { Prompt, usePromptStore } from "../store/prompt"; -import Locale from "../locales"; +import Locale, { getLang, getSTTLang } from "../locales"; import { IconButton } from "./button"; import styles from "./chat.module.scss"; @@ -90,6 +94,10 @@ import { import { useNavigate } from "react-router-dom"; import { CHAT_PAGE_SIZE, + DEFAULT_STT_ENGINE, + DEFAULT_TTS_ENGINE, + FIREFOX_DEFAULT_STT_ENGINE, + ModelProvider, LAST_INPUT_KEY, Path, REQUEST_TIMEOUT_MS, @@ -106,6 +114,16 @@ import { ExportMessageModal } from "./exporter"; import { getClientConfig } from "../config/client"; import { useAllModels } from "../utils/hooks"; import { MultimodalContent } from "../client/api"; +import { ClientApi } from "../client/api"; +import { createTTSPlayer } from "../utils/audio"; +import { + OpenAITranscriptionApi, + SpeechApi, + WebTranscriptionApi, +} from "../utils/speech"; +import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts"; + +const ttsPlayer = createTTSPlayer(); const Markdown = dynamic(async () => (await import("./markdown")).Markdown, { loading: () => , @@ -922,6 +940,33 @@ function _Chat() { } }; + const [isListening, setIsListening] = useState(false); + const [isTranscription, setIsTranscription] = useState(false); + const [speechApi, setSpeechApi] = useState(null); + + const startListening = async () => { + if (speechApi) { + await speechApi.start(); + setIsListening(true); + } + }; + + const stopListening = async () => { + if (speechApi) { + if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) + setIsTranscription(true); + await speechApi.stop(); + setIsListening(false); + } + }; + + const onRecognitionEnd = (finalTranscript: string) => { + console.log(finalTranscript); + if (finalTranscript) setUserInput(finalTranscript); + if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) + setIsTranscription(false); + }; + const doSubmit = (userInput: string) => { if (userInput.trim() === "") return; const matchCommand = chatCommands.match(userInput); @@ -992,6 +1037,16 @@ function _Chat() { } }); // eslint-disable-next-line react-hooks/exhaustive-deps + if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE; + setSpeechApi( + config.sttConfig.engine === DEFAULT_STT_ENGINE + ? new WebTranscriptionApi((transcription) => + onRecognitionEnd(transcription), + ) + : new OpenAITranscriptionApi((transcription) => + onRecognitionEnd(transcription), + ), + ); }, []); // check if should send message @@ -1102,10 +1157,55 @@ function _Chat() { }); }; + const accessStore = useAccessStore(); + const [speechStatus, setSpeechStatus] = useState(false); + const [speechLoading, setSpeechLoading] = useState(false); + async function openaiSpeech(text: string) { + if (speechStatus) { + ttsPlayer.stop(); + setSpeechStatus(false); + } else { + var api: ClientApi; + api = new ClientApi(ModelProvider.GPT); + const config = useAppConfig.getState(); + setSpeechLoading(true); + ttsPlayer.init(); + let audioBuffer: ArrayBuffer; + const { markdownToTxt } = require("markdown-to-txt"); + const textContent = markdownToTxt(text); + if (config.ttsConfig.engine !== DEFAULT_TTS_ENGINE) { + const edgeVoiceName = accessStore.edgeVoiceName(); + const tts = new MsEdgeTTS(); + await tts.setMetadata( + edgeVoiceName, + OUTPUT_FORMAT.AUDIO_24KHZ_96KBITRATE_MONO_MP3, + ); + audioBuffer = await tts.toArrayBuffer(textContent); + } else { + audioBuffer = await api.llm.speech({ + model: config.ttsConfig.model, + input: textContent, + voice: config.ttsConfig.voice, + speed: config.ttsConfig.speed, + }); + } + setSpeechStatus(true); + ttsPlayer + .play(audioBuffer, () => { + setSpeechStatus(false); + }) + .catch((e) => { + console.error("[OpenAI Speech]", e); + showToast(prettyObject(e)); + setSpeechStatus(false); + }) + .finally(() => setSpeechLoading(false)); + } + } + const context: RenderMessage[] = useMemo(() => { return session.mask.hideContext ? [] : session.mask.context.slice(); }, [session.mask.context, session.mask.hideContext]); - const accessStore = useAccessStore(); if ( context.length === 0 && @@ -1567,6 +1667,26 @@ function _Chat() { ) } /> + {config.ttsConfig.enable && ( + + ) : ( + + ) + } + onClick={() => + openaiSpeech(getMessageTextContent(message)) + } + /> + )} )} @@ -1714,13 +1834,35 @@ function _Chat() { })} )} - } + text={ + isListening ? Locale.Chat.StopSpeak : Locale.Chat.StartSpeak + } + className={styles["chat-input-send"]} + type="primary" + onClick={async () => + isListening ? await stopListening() : await startListening() + } + loding={isTranscription} + /> + ) : ( + } + text={Locale.Chat.Send} + className={styles["chat-input-send"]} + type="primary" + onClick={() => doSubmit(userInput)} + /> + )} + {/* } text={Locale.Chat.Send} className={styles["chat-input-send"]} type="primary" onClick={() => doSubmit(userInput)} - /> + /> */} diff --git a/app/components/settings.tsx b/app/components/settings.tsx index ca0a5a18796..47a72d79de7 100644 --- a/app/components/settings.tsx +++ b/app/components/settings.tsx @@ -80,6 +80,8 @@ import { useSyncStore } from "../store/sync"; import { nanoid } from "nanoid"; import { useMaskStore } from "../store/mask"; import { ProviderType } from "../utils/cloud"; +import { TTSConfigList } from "./tts-config"; +import { STTConfigList } from "./stt-config"; function EditPromptModal(props: { id: string; onClose: () => void }) { const promptStore = usePromptStore(); @@ -1646,6 +1648,28 @@ export function Settings() { setShowPromptModal(false)} /> )} + + { + const ttsConfig = { ...config.ttsConfig }; + updater(ttsConfig); + config.update((config) => (config.ttsConfig = ttsConfig)); + }} + /> + + + + { + const sttConfig = { ...config.sttConfig }; + updater(sttConfig); + config.update((config) => (config.sttConfig = sttConfig)); + }} + /> + + diff --git a/app/components/stt-config.tsx b/app/components/stt-config.tsx new file mode 100644 index 00000000000..f83d280305f --- /dev/null +++ b/app/components/stt-config.tsx @@ -0,0 +1,51 @@ +import { STTConfig, STTConfigValidator } from "../store"; + +import Locale from "../locales"; +import { ListItem, Select } from "./ui-lib"; +import { DEFAULT_STT_ENGINES } from "../constant"; +import { isFirefox } from "../utils"; + +export function STTConfigList(props: { + sttConfig: STTConfig; + updateConfig: (updater: (config: STTConfig) => void) => void; +}) { + return ( + <> + + + props.updateConfig( + (config) => (config.enable = e.currentTarget.checked), + ) + } + > + + {!isFirefox() && ( + + + + )} + + ); +} diff --git a/app/components/stt.module.scss b/app/components/stt.module.scss new file mode 100644 index 00000000000..ba9f382e40b --- /dev/null +++ b/app/components/stt.module.scss @@ -0,0 +1,119 @@ +@import "../styles/animation.scss"; +.plugin-page { + height: 100%; + display: flex; + flex-direction: column; + + .plugin-page-body { + padding: 20px; + overflow-y: auto; + + .plugin-filter { + width: 100%; + max-width: 100%; + margin-bottom: 20px; + animation: slide-in ease 0.3s; + height: 40px; + + display: flex; + + .search-bar { + flex-grow: 1; + max-width: 100%; + min-width: 0; + outline: none; + } + + .search-bar:focus { + border: 1px solid var(--primary); + } + + .plugin-filter-lang { + height: 100%; + margin-left: 10px; + } + + .plugin-create { + height: 100%; + margin-left: 10px; + box-sizing: border-box; + min-width: 80px; + } + } + + .plugin-item { + display: flex; + justify-content: space-between; + padding: 20px; + border: var(--border-in-light); + animation: slide-in ease 0.3s; + + &:not(:last-child) { + border-bottom: 0; + } + + &:first-child { + border-top-left-radius: 10px; + border-top-right-radius: 10px; + } + + &:last-child { + border-bottom-left-radius: 10px; + border-bottom-right-radius: 10px; + } + + .plugin-header { + display: flex; + align-items: center; + + .plugin-icon { + display: flex; + align-items: center; + justify-content: center; + margin-right: 10px; + } + + .plugin-title { + .plugin-name { + font-size: 14px; + font-weight: bold; + } + .plugin-info { + font-size: 12px; + } + .plugin-runtime-warning { + font-size: 12px; + color: #f86c6c; + } + } + } + + .plugin-actions { + display: flex; + flex-wrap: nowrap; + transition: all ease 0.3s; + justify-content: center; + align-items: center; + } + + @media screen and (max-width: 600px) { + display: flex; + flex-direction: column; + padding-bottom: 10px; + border-radius: 10px; + margin-bottom: 20px; + box-shadow: var(--card-shadow); + + &:not(:last-child) { + border-bottom: var(--border-in-light); + } + + .plugin-actions { + width: 100%; + justify-content: space-between; + padding-top: 10px; + } + } + } + } +} diff --git a/app/components/tts-config.tsx b/app/components/tts-config.tsx new file mode 100644 index 00000000000..f86e3bc520a --- /dev/null +++ b/app/components/tts-config.tsx @@ -0,0 +1,132 @@ +import { PluginConfig, TTSConfig, TTSConfigValidator } from "../store"; + +import Locale from "../locales"; +import { ListItem, Select } from "./ui-lib"; +import { + DEFAULT_TTS_ENGINE, + DEFAULT_TTS_ENGINES, + DEFAULT_TTS_MODELS, + DEFAULT_TTS_VOICES, +} from "../constant"; +import { InputRange } from "./input-range"; + +export function TTSConfigList(props: { + ttsConfig: TTSConfig; + updateConfig: (updater: (config: TTSConfig) => void) => void; +}) { + return ( + <> + + + props.updateConfig( + (config) => (config.enable = e.currentTarget.checked), + ) + } + > + + {/* + + props.updateConfig( + (config) => (config.autoplay = e.currentTarget.checked), + ) + } + > + */} + + + + {props.ttsConfig.engine === DEFAULT_TTS_ENGINE && ( + <> + + + + + + + + { + props.updateConfig( + (config) => + (config.speed = TTSConfigValidator.speed( + e.currentTarget.valueAsNumber, + )), + ); + }} + > + + + )} + + ); +} diff --git a/app/components/tts.module.scss b/app/components/tts.module.scss new file mode 100644 index 00000000000..ba9f382e40b --- /dev/null +++ b/app/components/tts.module.scss @@ -0,0 +1,119 @@ +@import "../styles/animation.scss"; +.plugin-page { + height: 100%; + display: flex; + flex-direction: column; + + .plugin-page-body { + padding: 20px; + overflow-y: auto; + + .plugin-filter { + width: 100%; + max-width: 100%; + margin-bottom: 20px; + animation: slide-in ease 0.3s; + height: 40px; + + display: flex; + + .search-bar { + flex-grow: 1; + max-width: 100%; + min-width: 0; + outline: none; + } + + .search-bar:focus { + border: 1px solid var(--primary); + } + + .plugin-filter-lang { + height: 100%; + margin-left: 10px; + } + + .plugin-create { + height: 100%; + margin-left: 10px; + box-sizing: border-box; + min-width: 80px; + } + } + + .plugin-item { + display: flex; + justify-content: space-between; + padding: 20px; + border: var(--border-in-light); + animation: slide-in ease 0.3s; + + &:not(:last-child) { + border-bottom: 0; + } + + &:first-child { + border-top-left-radius: 10px; + border-top-right-radius: 10px; + } + + &:last-child { + border-bottom-left-radius: 10px; + border-bottom-right-radius: 10px; + } + + .plugin-header { + display: flex; + align-items: center; + + .plugin-icon { + display: flex; + align-items: center; + justify-content: center; + margin-right: 10px; + } + + .plugin-title { + .plugin-name { + font-size: 14px; + font-weight: bold; + } + .plugin-info { + font-size: 12px; + } + .plugin-runtime-warning { + font-size: 12px; + color: #f86c6c; + } + } + } + + .plugin-actions { + display: flex; + flex-wrap: nowrap; + transition: all ease 0.3s; + justify-content: center; + align-items: center; + } + + @media screen and (max-width: 600px) { + display: flex; + flex-direction: column; + padding-bottom: 10px; + border-radius: 10px; + margin-bottom: 20px; + box-shadow: var(--card-shadow); + + &:not(:last-child) { + border-bottom: var(--border-in-light); + } + + .plugin-actions { + width: 100%; + justify-content: space-between; + padding-top: 10px; + } + } + } + } +} diff --git a/app/constant.ts b/app/constant.ts index e88d497ca94..ec0445d2e0e 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -153,6 +153,8 @@ export const Anthropic = { export const OpenaiPath = { ChatPath: "v1/chat/completions", + SpeechPath: "v1/audio/speech", + TranscriptionPath: "v1/audio/transcriptions", ImagePath: "v1/images/generations", UsagePath: "dashboard/billing/usage", SubsPath: "dashboard/billing/subscription", @@ -256,6 +258,24 @@ export const KnowledgeCutOffDate: Record = { "gemini-pro-vision": "2023-12", }; +export const DEFAULT_TTS_ENGINE = "OpenAI-TTS"; +export const DEFAULT_TTS_ENGINES = ["OpenAI-TTS", "Edge-TTS"]; +export const DEFAULT_TTS_MODEL = "tts-1"; +export const DEFAULT_TTS_VOICE = "alloy"; +export const DEFAULT_TTS_MODELS = ["tts-1", "tts-1-hd"]; +export const DEFAULT_TTS_VOICES = [ + "alloy", + "echo", + "fable", + "onyx", + "nova", + "shimmer", +]; + +export const DEFAULT_STT_ENGINE = "WebAPI"; +export const DEFAULT_STT_ENGINES = ["WebAPI", "OpenAI Whisper"]; +export const FIREFOX_DEFAULT_STT_ENGINE = "OpenAI Whisper"; + const openaiModels = [ "gpt-3.5-turbo", "gpt-3.5-turbo-1106", diff --git a/app/icons/speak-stop.svg b/app/icons/speak-stop.svg new file mode 100644 index 00000000000..926ae7bb3d6 --- /dev/null +++ b/app/icons/speak-stop.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/icons/speak.svg b/app/icons/speak.svg new file mode 100644 index 00000000000..e02212c9a42 --- /dev/null +++ b/app/icons/speak.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/app/icons/voice-white.svg b/app/icons/voice-white.svg new file mode 100644 index 00000000000..0a4a0ae31cd --- /dev/null +++ b/app/icons/voice-white.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/app/locales/cn.ts b/app/locales/cn.ts index 9a3227d68a5..c6aef51402f 100644 --- a/app/locales/cn.ts +++ b/app/locales/cn.ts @@ -43,6 +43,8 @@ const cn = { Delete: "删除", Edit: "编辑", FullScreen: "全屏", + Speech: "朗读", + StopSpeech: "停止", }, Commands: { new: "新建聊天", @@ -76,6 +78,8 @@ const cn = { return inputHints + ",/ 触发补全,: 触发命令"; }, Send: "发送", + StartSpeak: "说话", + StopSpeak: "停止", Config: { Reset: "清除记忆", SaveAs: "存为面具", @@ -481,6 +485,36 @@ const cn = { Title: "频率惩罚度 (frequency_penalty)", SubTitle: "值越大,越有可能降低重复字词", }, + TTS: { + Enable: { + Title: "启用文本转语音", + SubTitle: "启用文本生成语音服务", + }, + Autoplay: { + Title: "启用自动朗读", + SubTitle: "自动生成语音并播放,需先开启文本转语音开关", + }, + Model: "模型", + Engine: "转换引擎", + Voice: { + Title: "声音", + SubTitle: "生成语音时使用的声音", + }, + Speed: { + Title: "速度", + SubTitle: "生成语音的速度", + }, + }, + STT: { + Enable: { + Title: "启用语音转文本", + SubTitle: "启用语音转文本", + }, + Engine: { + Title: "转换引擎", + SubTitle: "音频转换引擎", + }, + }, }, Store: { DefaultTopic: "新的聊天", diff --git a/app/locales/en.ts b/app/locales/en.ts index 77f3a700ae1..1aa2137ec8d 100644 --- a/app/locales/en.ts +++ b/app/locales/en.ts @@ -45,6 +45,8 @@ const en: LocaleType = { Delete: "Delete", Edit: "Edit", FullScreen: "FullScreen", + Speech: "Play", + StopSpeech: "Stop", }, Commands: { new: "Start a new chat", diff --git a/app/locales/index.ts b/app/locales/index.ts index acdb3e878a1..3078afc7b54 100644 --- a/app/locales/index.ts +++ b/app/locales/index.ts @@ -137,3 +137,34 @@ export function getISOLang() { const lang = getLang(); return isoLangString[lang] ?? lang; } + +const DEFAULT_STT_LANG = "zh-CN"; +export const STT_LANG_MAP: Record = { + cn: "zh-CN", + en: "en-US", + pt: "pt-BR", + tw: "zh-TW", + jp: "ja-JP", + ko: "ko-KR", + id: "id-ID", + fr: "fr-FR", + es: "es-ES", + it: "it-IT", + tr: "tr-TR", + de: "de-DE", + vi: "vi-VN", + ru: "ru-RU", + cs: "cs-CZ", + no: "no-NO", + ar: "ar-SA", + bn: "bn-BD", + sk: "sk-SK", +}; + +export function getSTTLang(): string { + try { + return STT_LANG_MAP[getLang()]; + } catch { + return DEFAULT_STT_LANG; + } +} diff --git a/app/store/access.ts b/app/store/access.ts index a1014610e39..0e392e1e92e 100644 --- a/app/store/access.ts +++ b/app/store/access.ts @@ -120,6 +120,9 @@ const DEFAULT_ACCESS_STATE = { disableFastLink: false, customModels: "", defaultModel: "", + + // tts config + edgeTTSVoiceName: "zh-CN-YunxiNeural", }; export const useAccessStore = createPersistStore( @@ -132,6 +135,12 @@ export const useAccessStore = createPersistStore( return get().needCode; }, + edgeVoiceName() { + this.fetch(); + + return get().edgeTTSVoiceName; + }, + isValidOpenAI() { return ensure(get(), ["openaiApiKey"]); }, diff --git a/app/store/config.ts b/app/store/config.ts index e8e3c9863ef..e2de06c9ad8 100644 --- a/app/store/config.ts +++ b/app/store/config.ts @@ -5,12 +5,25 @@ import { DEFAULT_INPUT_TEMPLATE, DEFAULT_MODELS, DEFAULT_SIDEBAR_WIDTH, + DEFAULT_STT_ENGINE, + DEFAULT_STT_ENGINES, + DEFAULT_TTS_ENGINE, + DEFAULT_TTS_ENGINES, + DEFAULT_TTS_MODEL, + DEFAULT_TTS_MODELS, + DEFAULT_TTS_VOICE, + DEFAULT_TTS_VOICES, StoreKey, ServiceProvider, } from "../constant"; import { createPersistStore } from "../utils/store"; export type ModelType = (typeof DEFAULT_MODELS)[number]["name"]; +export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number]; +export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number]; +export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number]; + +export type STTEngineType = (typeof DEFAULT_STT_ENGINES)[number]; export enum SubmitKey { Enter = "Enter", @@ -66,11 +79,26 @@ export const DEFAULT_CONFIG = { quality: "standard" as DalleQuality, style: "vivid" as DalleStyle, }, + + ttsConfig: { + enable: false, + autoplay: false, + engine: DEFAULT_TTS_ENGINE, + model: DEFAULT_TTS_MODEL, + voice: DEFAULT_TTS_VOICE, + speed: 1.0, + }, + sttConfig: { + enable: false, + engine: DEFAULT_STT_ENGINE, + }, }; export type ChatConfig = typeof DEFAULT_CONFIG; export type ModelConfig = ChatConfig["modelConfig"]; +export type TTSConfig = ChatConfig["ttsConfig"]; +export type STTConfig = ChatConfig["sttConfig"]; export function limitNumber( x: number, @@ -85,6 +113,27 @@ export function limitNumber( return Math.min(max, Math.max(min, x)); } +export const TTSConfigValidator = { + engine(x: string) { + return x as TTSEngineType; + }, + model(x: string) { + return x as TTSModelType; + }, + voice(x: string) { + return x as TTSVoiceType; + }, + speed(x: number) { + return limitNumber(x, 0.25, 4.0, 1.0); + }, +}; + +export const STTConfigValidator = { + engine(x: string) { + return x as STTEngineType; + }, +}; + export const ModalConfigValidator = { model(x: string) { return x as ModelType; diff --git a/app/utils/audio.ts b/app/utils/audio.ts new file mode 100644 index 00000000000..f6828c7aac4 --- /dev/null +++ b/app/utils/audio.ts @@ -0,0 +1,45 @@ +type TTSPlayer = { + init: () => void; + play: (audioBuffer: ArrayBuffer, onended: () => void | null) => Promise; + stop: () => void; +}; + +export function createTTSPlayer(): TTSPlayer { + let audioContext: AudioContext | null = null; + let audioBufferSourceNode: AudioBufferSourceNode | null = null; + + const init = () => { + audioContext = new (window.AudioContext || window.webkitAudioContext)(); + audioContext.suspend(); + }; + + const play = async (audioBuffer: ArrayBuffer, onended: () => void | null) => { + if (audioBufferSourceNode) { + audioBufferSourceNode.stop(); + audioBufferSourceNode.disconnect(); + } + + const buffer = await audioContext!.decodeAudioData(audioBuffer); + audioBufferSourceNode = audioContext!.createBufferSource(); + audioBufferSourceNode.buffer = buffer; + audioBufferSourceNode.connect(audioContext!.destination); + audioContext!.resume().then(() => { + audioBufferSourceNode!.start(); + }); + audioBufferSourceNode.onended = onended; + }; + + const stop = () => { + if (audioBufferSourceNode) { + audioBufferSourceNode.stop(); + audioBufferSourceNode.disconnect(); + audioBufferSourceNode = null; + } + if (audioContext) { + audioContext.close(); + audioContext = null; + } + }; + + return { init, play, stop }; +} diff --git a/app/utils/ms_edge_tts.ts b/app/utils/ms_edge_tts.ts new file mode 100644 index 00000000000..f291ebada93 --- /dev/null +++ b/app/utils/ms_edge_tts.ts @@ -0,0 +1,391 @@ +// import axios from "axios"; +import { Buffer } from "buffer"; +import { randomBytes } from "crypto"; +import { Readable } from "stream"; + +// Modified according to https://github.com/Migushthe2nd/MsEdgeTTS + +/** + * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume + */ +export enum VOLUME { + SILENT = "silent", + X_SOFT = "x-soft", + SOFT = "soft", + MEDIUM = "medium", + LOUD = "loud", + X_LOUD = "x-LOUD", + DEFAULT = "default", +} + +/** + * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking + */ +export enum RATE { + X_SLOW = "x-slow", + SLOW = "slow", + MEDIUM = "medium", + FAST = "fast", + X_FAST = "x-fast", + DEFAULT = "default", +} + +/** + * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline + */ +export enum PITCH { + X_LOW = "x-low", + LOW = "low", + MEDIUM = "medium", + HIGH = "high", + X_HIGH = "x-high", + DEFAULT = "default", +} + +/** + * Only a few of the [possible formats](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs) are accepted. + */ +export enum OUTPUT_FORMAT { + // Streaming ============================= + // AMR_WB_16000HZ = "amr-wb-16000hz", + // AUDIO_16KHZ_16BIT_32KBPS_MONO_OPUS = "audio-16khz-16bit-32kbps-mono-opus", + // AUDIO_16KHZ_32KBITRATE_MONO_MP3 = "audio-16khz-32kbitrate-mono-mp3", + // AUDIO_16KHZ_64KBITRATE_MONO_MP3 = "audio-16khz-64kbitrate-mono-mp3", + // AUDIO_16KHZ_128KBITRATE_MONO_MP3 = "audio-16khz-128kbitrate-mono-mp3", + // AUDIO_24KHZ_16BIT_24KBPS_MONO_OPUS = "audio-24khz-16bit-24kbps-mono-opus", + // AUDIO_24KHZ_16BIT_48KBPS_MONO_OPUS = "audio-24khz-16bit-48kbps-mono-opus", + AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3", + AUDIO_24KHZ_96KBITRATE_MONO_MP3 = "audio-24khz-96kbitrate-mono-mp3", + // AUDIO_24KHZ_160KBITRATE_MONO_MP3 = "audio-24khz-160kbitrate-mono-mp3", + // AUDIO_48KHZ_96KBITRATE_MONO_MP3 = "audio-48khz-96kbitrate-mono-mp3", + // AUDIO_48KHZ_192KBITRATE_MONO_MP3 = "audio-48khz-192kbitrate-mono-mp3", + // OGG_16KHZ_16BIT_MONO_OPUS = "ogg-16khz-16bit-mono-opus", + // OGG_24KHZ_16BIT_MONO_OPUS = "ogg-24khz-16bit-mono-opus", + // OGG_48KHZ_16BIT_MONO_OPUS = "ogg-48khz-16bit-mono-opus", + // RAW_8KHZ_8BIT_MONO_ALAW = "raw-8khz-8bit-mono-alaw", + // RAW_8KHZ_8BIT_MONO_MULAW = "raw-8khz-8bit-mono-mulaw", + // RAW_8KHZ_16BIT_MONO_PCM = "raw-8khz-16bit-mono-pcm", + // RAW_16KHZ_16BIT_MONO_PCM = "raw-16khz-16bit-mono-pcm", + // RAW_16KHZ_16BIT_MONO_TRUESILK = "raw-16khz-16bit-mono-truesilk", + // RAW_22050HZ_16BIT_MONO_PCM = "raw-22050hz-16bit-mono-pcm", + // RAW_24KHZ_16BIT_MONO_PCM = "raw-24khz-16bit-mono-pcm", + // RAW_24KHZ_16BIT_MONO_TRUESILK = "raw-24khz-16bit-mono-truesilk", + // RAW_44100HZ_16BIT_MONO_PCM = "raw-44100hz-16bit-mono-pcm", + // RAW_48KHZ_16BIT_MONO_PCM = "raw-48khz-16bit-mono-pcm", + // WEBM_16KHZ_16BIT_MONO_OPUS = "webm-16khz-16bit-mono-opus", + // WEBM_24KHZ_16BIT_24KBPS_MONO_OPUS = "webm-24khz-16bit-24kbps-mono-opus", + WEBM_24KHZ_16BIT_MONO_OPUS = "webm-24khz-16bit-mono-opus", + // Non-streaming ============================= + // RIFF_8KHZ_8BIT_MONO_ALAW = "riff-8khz-8bit-mono-alaw", + // RIFF_8KHZ_8BIT_MONO_MULAW = "riff-8khz-8bit-mono-mulaw", + // RIFF_8KHZ_16BIT_MONO_PCM = "riff-8khz-16bit-mono-pcm", + // RIFF_22050HZ_16BIT_MONO_PCM = "riff-22050hz-16bit-mono-pcm", + // RIFF_24KHZ_16BIT_MONO_PCM = "riff-24khz-16bit-mono-pcm", + // RIFF_44100HZ_16BIT_MONO_PCM = "riff-44100hz-16bit-mono-pcm", + // RIFF_48KHZ_16BIT_MONO_PCM = "riff-48khz-16bit-mono-pcm", +} + +export type Voice = { + Name: string; + ShortName: string; + Gender: string; + Locale: string; + SuggestedCodec: string; + FriendlyName: string; + Status: string; +}; + +export class ProsodyOptions { + /** + * The pitch to use. + * Can be any {@link PITCH}, or a relative frequency in Hz (+50Hz), a relative semitone (+2st), or a relative percentage (+50%). + * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline) + */ + pitch?: PITCH | string = "+0Hz"; + /** + * The rate to use. + * Can be any {@link RATE}, or a relative number (0.5), or string with a relative percentage (+50%). + * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking) + */ + rate?: RATE | string | number = 1.0; + /** + * The volume to use. + * Can be any {@link VOLUME}, or an absolute number (0, 100), a string with a relative number (+50), or a relative percentage (+50%). + * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume) + */ + volume?: VOLUME | string | number = 100.0; +} + +export class MsEdgeTTS { + static OUTPUT_FORMAT = OUTPUT_FORMAT; + private static TRUSTED_CLIENT_TOKEN = "6A5AA1D4EAFF4E9FB37E23D68491D6F4"; + private static VOICES_URL = `https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`; + private static SYNTH_URL = `wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=${MsEdgeTTS.TRUSTED_CLIENT_TOKEN}`; + private static BINARY_DELIM = "Path:audio\r\n"; + private static VOICE_LANG_REGEX = /\w{2}-\w{2}/; + private readonly _enableLogger; + private _ws: WebSocket | undefined; + private _voice: any; + private _voiceLocale: any; + private _outputFormat: any; + private _streams: { [key: string]: Readable } = {}; + private _startTime = 0; + + private _log(...o: any[]) { + if (this._enableLogger) { + console.log(...o); + } + } + + /** + * Create a new `MsEdgeTTS` instance. + * + * @param agent (optional, **NOT SUPPORTED IN BROWSER**) Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent). + * @param enableLogger=false whether to enable the built-in logger. This logs connections inits, disconnects, and incoming data to the console + */ + public constructor(enableLogger: boolean = false) { + this._enableLogger = enableLogger; + } + + private async _send(message: any) { + for (let i = 1; i <= 3 && this._ws!.readyState !== this._ws!.OPEN; i++) { + if (i == 1) { + this._startTime = Date.now(); + } + this._log("connecting: ", i); + await this._initClient(); + } + this._ws!.send(message); + } + + private _initClient() { + this._ws = new WebSocket(MsEdgeTTS.SYNTH_URL); + + this._ws.binaryType = "arraybuffer"; + return new Promise((resolve, reject) => { + this._ws!.onopen = () => { + this._log( + "Connected in", + (Date.now() - this._startTime) / 1000, + "seconds", + ); + this._send( + `Content-Type:application/json; charset=utf-8\r\nPath:speech.config\r\n\r\n + { + "context": { + "synthesis": { + "audio": { + "metadataoptions": { + "sentenceBoundaryEnabled": "false", + "wordBoundaryEnabled": "false" + }, + "outputFormat": "${this._outputFormat}" + } + } + } + } + `, + ).then(resolve); + }; + this._ws!.onmessage = (m: any) => { + const buffer = Buffer.from(m.data as ArrayBuffer); + const message = buffer.toString(); + const requestId = /X-RequestId:(.*?)\r\n/gm.exec(message)![1]; + if (message.includes("Path:turn.start")) { + // start of turn, ignore + } else if (message.includes("Path:turn.end")) { + // end of turn, close stream + this._streams[requestId].push(null); + } else if (message.includes("Path:response")) { + // context response, ignore + } else if ( + message.includes("Path:audio") && + m.data instanceof ArrayBuffer + ) { + this._pushAudioData(buffer, requestId); + } else { + this._log("UNKNOWN MESSAGE", message); + } + }; + this._ws!.onclose = () => { + this._log( + "disconnected after:", + (Date.now() - this._startTime) / 1000, + "seconds", + ); + for (const requestId in this._streams) { + this._streams[requestId].push(null); + } + }; + this._ws!.onerror = function (error: any) { + reject("Connect Error: " + error); + }; + }); + } + + private _pushAudioData(audioBuffer: Buffer, requestId: string) { + const audioStartIndex = + audioBuffer.indexOf(MsEdgeTTS.BINARY_DELIM) + + MsEdgeTTS.BINARY_DELIM.length; + const audioData = audioBuffer.subarray(audioStartIndex); + this._streams[requestId].push(audioData); + this._log("received audio chunk, size: ", audioData?.length); + } + + private _SSMLTemplate(input: string, options: ProsodyOptions = {}): string { + // in case future updates to the edge API block these elements, we'll be concatenating strings. + options = { ...new ProsodyOptions(), ...options }; + return ` + + + ${input} + + + `; + } + + /** + * Fetch the list of voices available in Microsoft Edge. + * These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview). + */ + // getVoices(): Promise { + // return new Promise((resolve, reject) => { + // axios + // .get(MsEdgeTTS.VOICES_URL) + // .then((res) => resolve(res.data)) + // .catch(reject); + // }); + // } + getVoices(): Promise { + return fetch(MsEdgeTTS.VOICES_URL) + .then((response) => { + if (!response.ok) { + throw new Error("Network response was not ok"); + } + return response.json(); + }) + .then((data) => data as Voice[]) + .catch((error) => { + throw error; + }); + } + + /** + * Sets the required information for the speech to be synthesised and inits a new WebSocket connection. + * Must be called at least once before text can be synthesised. + * Saved in this instance. Can be called at any time times to update the metadata. + * + * @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices) + * @param outputFormat any {@link OUTPUT_FORMAT} + * @param voiceLocale (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName` + */ + async setMetadata( + voiceName: string, + outputFormat: OUTPUT_FORMAT, + voiceLocale?: string, + ) { + const oldVoice = this._voice; + const oldVoiceLocale = this._voiceLocale; + const oldOutputFormat = this._outputFormat; + + this._voice = voiceName; + this._voiceLocale = voiceLocale; + if (!this._voiceLocale) { + const voiceLangMatch = MsEdgeTTS.VOICE_LANG_REGEX.exec(this._voice); + if (!voiceLangMatch) + throw new Error("Could not infer voiceLocale from voiceName!"); + this._voiceLocale = voiceLangMatch[0]; + } + this._outputFormat = outputFormat; + + const changed = + oldVoice !== this._voice || + oldVoiceLocale !== this._voiceLocale || + oldOutputFormat !== this._outputFormat; + + // create new client + if (changed || this._ws!.readyState !== this._ws!.OPEN) { + this._startTime = Date.now(); + await this._initClient(); + } + } + + private _metadataCheck() { + if (!this._ws) + throw new Error( + "Speech synthesis not configured yet. Run setMetadata before calling toStream or toFile.", + ); + } + + /** + * Close the WebSocket connection. + */ + close() { + this._ws!.close(); + } + + /** + * Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}. + * + * @param input the text to synthesise. Can include SSML elements. + * @param options (optional) {@link ProsodyOptions} + * @returns {Readable} - a `stream.Readable` with the audio data + */ + toStream(input: string, options?: ProsodyOptions): Readable { + const { stream } = this._rawSSMLRequest(this._SSMLTemplate(input, options)); + return stream; + } + + toArrayBuffer(input: string, options?: ProsodyOptions): Promise { + return new Promise((resolve, reject) => { + let data: Uint8Array[] = []; + const readable = this.toStream(input, options); + readable.on("data", (chunk) => { + data.push(chunk); + }); + + readable.on("end", () => { + resolve(Buffer.concat(data).buffer); + }); + + readable.on("error", (err) => { + reject(err); + }); + }); + } + + /** + * Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request. + * + * @param requestSSML the SSML to send. SSML elements required in order to work. + * @returns {Readable} - a `stream.Readable` with the audio data + */ + rawToStream(requestSSML: string): Readable { + const { stream } = this._rawSSMLRequest(requestSSML); + return stream; + } + + private _rawSSMLRequest(requestSSML: string): { + stream: Readable; + requestId: string; + } { + this._metadataCheck(); + + const requestId = randomBytes(16).toString("hex"); + const request = + `X-RequestId:${requestId}\r\nContent-Type:application/ssml+xml\r\nPath:ssml\r\n\r\n + ` + requestSSML.trim(); + // https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup + const self = this; + const stream = new Readable({ + read() {}, + destroy(error: Error | null, callback: (error: Error | null) => void) { + delete self._streams[requestId]; + callback(error); + }, + }); + this._streams[requestId] = stream; + this._send(request).then(); + return { stream, requestId }; + } +} diff --git a/app/utils/speech.ts b/app/utils/speech.ts new file mode 100644 index 00000000000..dc8102879fb --- /dev/null +++ b/app/utils/speech.ts @@ -0,0 +1,126 @@ +import { ChatGPTApi } from "../client/platforms/openai"; +import { getSTTLang } from "../locales"; +import { isFirefox } from "../utils"; + +export type TranscriptionCallback = (transcription: string) => void; + +export abstract class SpeechApi { + protected onTranscription: TranscriptionCallback = () => {}; + + abstract isListening(): boolean; + abstract start(): Promise; + abstract stop(): Promise; + + onTranscriptionReceived(callback: TranscriptionCallback) { + this.onTranscription = callback; + } +} + +export class OpenAITranscriptionApi extends SpeechApi { + private listeningStatus = false; + private mediaRecorder: MediaRecorder | null = null; + private stream: MediaStream | null = null; + private audioChunks: Blob[] = []; + + isListening = () => this.listeningStatus; + + constructor(transcriptionCallback?: TranscriptionCallback) { + super(); + if (transcriptionCallback) { + this.onTranscriptionReceived(transcriptionCallback); + } + } + + async start(): Promise { + // @ts-ignore + navigator.getUserMedia = + // @ts-ignore + navigator.getUserMedia || + // @ts-ignore + navigator.webkitGetUserMedia || + // @ts-ignore + navigator.mozGetUserMedia || + // @ts-ignore + navigator.msGetUserMedia; + if (navigator.mediaDevices) { + const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); + this.mediaRecorder = new MediaRecorder(stream); + this.mediaRecorder.ondataavailable = (e) => { + if (e.data && e.data.size > 0) { + this.audioChunks.push(e.data); + } + }; + + this.stream = stream; + } else { + console.warn("Media Decives will work only with SSL"); + return; + } + + this.audioChunks = []; + + // this.recorder.addEventListener("dataavailable", (event) => { + // this.audioChunks.push(event.data); + // }); + + this.mediaRecorder.start(1000); + this.listeningStatus = true; + } + + async stop(): Promise { + if (!this.mediaRecorder || !this.listeningStatus) { + return; + } + + return new Promise((resolve) => { + this.mediaRecorder!.addEventListener("stop", async () => { + const audioBlob = new Blob(this.audioChunks, { type: "audio/wav" }); + const llm = new ChatGPTApi(); + const transcription = await llm.transcription({ file: audioBlob }); + this.onTranscription(transcription); + this.listeningStatus = false; + resolve(); + }); + + this.mediaRecorder!.stop(); + }); + } +} + +export class WebTranscriptionApi extends SpeechApi { + private listeningStatus = false; + private recognitionInstance: any | null = null; + + isListening = () => this.listeningStatus; + + constructor(transcriptionCallback?: TranscriptionCallback) { + super(); + if (isFirefox()) return; + const SpeechRecognition = + (window as any).SpeechRecognition || + (window as any).webkitSpeechRecognition; + this.recognitionInstance = new SpeechRecognition(); + this.recognitionInstance.continuous = true; + this.recognitionInstance.interimResults = true; + this.recognitionInstance.lang = getSTTLang(); + if (transcriptionCallback) { + this.onTranscriptionReceived(transcriptionCallback); + } + this.recognitionInstance.onresult = (event: any) => { + const result = event.results[event.results.length - 1]; + if (result.isFinal) { + this.onTranscription(result[0].transcript); + } + }; + } + + async start(): Promise { + this.listeningStatus = true; + await this.recognitionInstance.start(); + } + + async stop(): Promise { + this.listeningStatus = false; + await this.recognitionInstance.stop(); + } +} diff --git a/package.json b/package.json index eb0a5ef6735..02d36ae3167 100644 --- a/package.json +++ b/package.json @@ -30,6 +30,7 @@ "html-to-image": "^1.11.11", "lodash-es": "^4.17.21", "mermaid": "^10.6.1", + "markdown-to-txt": "^2.0.1", "nanoid": "^5.0.3", "next": "^14.1.1", "node-fetch": "^3.3.1", @@ -73,4 +74,4 @@ "lint-staged/yaml": "^2.2.2" }, "packageManager": "yarn@1.22.19" -} +} \ No newline at end of file diff --git a/yarn.lock b/yarn.lock index 793c845d722..3b76a49e780 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4378,11 +4378,21 @@ lodash.debounce@^4.0.8: resolved "https://registry.yarnpkg.com/lodash.debounce/-/lodash.debounce-4.0.8.tgz#82d79bff30a67c4005ffd5e2515300ad9ca4d7af" integrity sha512-FT1yDzDYEoYWhnSGnpE/4Kj1fLZkDFyqRb7fNt6FdYOSxlUWAtp42Eh6Wb0rGIv/m9Bgo7x4GhQbm5Ys4SG5ow== +lodash.escape@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/lodash.escape/-/lodash.escape-4.0.1.tgz#c9044690c21e04294beaa517712fded1fa88de98" + integrity sha512-nXEOnb/jK9g0DYMr1/Xvq6l5xMD7GDG55+GSYIYmS0G4tBk/hURD4JR9WCavs04t33WmJx9kCyp9vJ+mr4BOUw== + lodash.merge@^4.6.2: version "4.6.2" resolved "https://registry.yarnpkg.com/lodash.merge/-/lodash.merge-4.6.2.tgz#558aa53b43b661e1925a0afdfa36a9a1085fe57a" integrity sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ== +lodash.unescape@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/lodash.unescape/-/lodash.unescape-4.0.1.tgz#bf2249886ce514cda112fae9218cdc065211fc9c" + integrity sha512-DhhGRshNS1aX6s5YdBE3njCCouPgnG29ebyHvImlZzXZf2SHgt+J08DHgytTPnpywNbO1Y8mNUFyQuIDBq2JZg== + lodash@^4.17.21: version "4.17.21" resolved "https://registry.npmmirror.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" @@ -4438,6 +4448,20 @@ markdown-table@^3.0.0: resolved "https://registry.yarnpkg.com/markdown-table/-/markdown-table-3.0.3.tgz#e6331d30e493127e031dd385488b5bd326e4a6bd" integrity sha512-Z1NL3Tb1M9wH4XESsCDEksWoKTdlUafKc4pt0GRwjUyXaCFZ+dc3g2erqB6zm3szA2IUSi7VnPI+o/9jnxh9hw== +markdown-to-txt@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/markdown-to-txt/-/markdown-to-txt-2.0.1.tgz#bfd6233a2635443cc24900a158b60c6af36ce9c5" + integrity sha512-Hsj7KTN8k1gutlLum3vosHwVZGnv8/cbYKWVkUyo/D1rzOYddbDesILebRfOsaVfjIBJank/AVOySBlHAYqfZw== + dependencies: + lodash.escape "^4.0.1" + lodash.unescape "^4.0.1" + marked "^4.0.14" + +marked@^4.0.14: + version "4.3.0" + resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3" + integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A== + mdast-util-definitions@^5.0.0: version "5.1.2" resolved "https://registry.yarnpkg.com/mdast-util-definitions/-/mdast-util-definitions-5.1.2.tgz#9910abb60ac5d7115d6819b57ae0bcef07a3f7a7" From 93f1762e6c85e2a71a70534dc8a84b322d3643e7 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Tue, 27 Aug 2024 17:02:44 +0800 Subject: [PATCH 03/36] chore: wip --- app/client/platforms/alibaba.ts | 7 +++++++ app/client/platforms/anthropic.ts | 7 +++++++ app/client/platforms/baidu.ts | 7 +++++++ app/client/platforms/bytedance.ts | 7 +++++++ app/client/platforms/google.ts | 6 ++++++ app/client/platforms/iflytek.ts | 7 +++++++ app/client/platforms/moonshot.ts | 7 +++++++ app/client/platforms/tencent.ts | 7 +++++++ 8 files changed, 55 insertions(+) diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts index d5fa3042fc1..477ef193fdc 100644 --- a/app/client/platforms/alibaba.ts +++ b/app/client/platforms/alibaba.ts @@ -83,6 +83,13 @@ export class QwenApi implements LLMApi { return res?.output?.choices?.at(0)?.message?.content ?? ""; } + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } + async chat(options: ChatOptions) { const messages = options.messages.map((v) => ({ role: v.role, diff --git a/app/client/platforms/anthropic.ts b/app/client/platforms/anthropic.ts index b079ba1ada2..df4dc7f3830 100644 --- a/app/client/platforms/anthropic.ts +++ b/app/client/platforms/anthropic.ts @@ -73,6 +73,13 @@ const ClaudeMapper = { const keys = ["claude-2, claude-instant-1"]; export class ClaudeApi implements LLMApi { + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } + extractMessage(res: any) { console.log("[Response] claude response: ", res); diff --git a/app/client/platforms/baidu.ts b/app/client/platforms/baidu.ts index 3be147f4985..2b3119c2a2c 100644 --- a/app/client/platforms/baidu.ts +++ b/app/client/platforms/baidu.ts @@ -75,6 +75,13 @@ export class ErnieApi implements LLMApi { return [baseUrl, path].join("/"); } + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } + async chat(options: ChatOptions) { const messages = options.messages.map((v) => ({ // "error_code": 336006, "error_msg": "the role of message with even index in the messages must be user or function", diff --git a/app/client/platforms/bytedance.ts b/app/client/platforms/bytedance.ts index 7677cafe12b..31c0be3d33b 100644 --- a/app/client/platforms/bytedance.ts +++ b/app/client/platforms/bytedance.ts @@ -77,6 +77,13 @@ export class DoubaoApi implements LLMApi { return res.choices?.at(0)?.message?.content ?? ""; } + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } + async chat(options: ChatOptions) { const messages = options.messages.map((v) => ({ role: v.role, diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts index 12d8846357a..6c6c3b25e26 100644 --- a/app/client/platforms/google.ts +++ b/app/client/platforms/google.ts @@ -56,6 +56,12 @@ export class GeminiProApi implements LLMApi { "" ); } + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } async chat(options: ChatOptions): Promise { const apiClient = this; let multimodal = false; diff --git a/app/client/platforms/iflytek.ts b/app/client/platforms/iflytek.ts index 73cea5ba0e7..77a4571e124 100644 --- a/app/client/platforms/iflytek.ts +++ b/app/client/platforms/iflytek.ts @@ -53,6 +53,13 @@ export class SparkApi implements LLMApi { return res.choices?.at(0)?.message?.content ?? ""; } + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } + async chat(options: ChatOptions) { const messages: ChatOptions["messages"] = []; for (const v of options.messages) { diff --git a/app/client/platforms/moonshot.ts b/app/client/platforms/moonshot.ts index 7d257ccb2e6..22bbaf01f46 100644 --- a/app/client/platforms/moonshot.ts +++ b/app/client/platforms/moonshot.ts @@ -66,6 +66,13 @@ export class MoonshotApi implements LLMApi { return res.choices?.at(0)?.message?.content ?? ""; } + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } + async chat(options: ChatOptions) { const messages: ChatOptions["messages"] = []; for (const v of options.messages) { diff --git a/app/client/platforms/tencent.ts b/app/client/platforms/tencent.ts index 579008a9b9d..5eb48791b01 100644 --- a/app/client/platforms/tencent.ts +++ b/app/client/platforms/tencent.ts @@ -89,6 +89,13 @@ export class HunyuanApi implements LLMApi { return res.Choices?.at(0)?.Message?.Content ?? ""; } + speech(options: SpeechOptions): Promise { + throw new Error("Method not implemented."); + } + transcription(options: TranscriptionOptions): Promise { + throw new Error("Method not implemented."); + } + async chat(options: ChatOptions) { const visionModel = isVisionModel(options.config.model); const messages = options.messages.map((v, index) => ({ From f86b220c922a9209e99e2a3647e97ab72f47de3d Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Tue, 27 Aug 2024 19:50:16 +0800 Subject: [PATCH 04/36] feat: add voice action --- app/components/chat.tsx | 113 ++++++++++++++++++-------------------- app/icons/voice-white.svg | 6 +- 2 files changed, 55 insertions(+), 64 deletions(-) diff --git a/app/components/chat.tsx b/app/components/chat.tsx index e5391ad226c..624b7618e21 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -453,6 +453,7 @@ export function ChatActions(props: { showPromptHints: () => void; hitBottom: boolean; uploading: boolean; + setUserInput: (input: string) => void; }) { const config = useAppConfig(); const navigate = useNavigate(); @@ -544,6 +545,44 @@ export function ChatActions(props: { } }, [chatStore, currentModel, models]); + const [isListening, setIsListening] = useState(false); + const [isTranscription, setIsTranscription] = useState(false); + const [speechApi, setSpeechApi] = useState(null); + + useEffect(() => { + if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE; + setSpeechApi( + config.sttConfig.engine === DEFAULT_STT_ENGINE + ? new WebTranscriptionApi((transcription) => + onRecognitionEnd(transcription), + ) + : new OpenAITranscriptionApi((transcription) => + onRecognitionEnd(transcription), + ), + ); + }, []); + + const startListening = async () => { + if (speechApi) { + await speechApi.start(); + setIsListening(true); + } + }; + const stopListening = async () => { + if (speechApi) { + if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) + setIsTranscription(true); + await speechApi.stop(); + setIsListening(false); + } + }; + const onRecognitionEnd = (finalTranscript: string) => { + console.log(finalTranscript); + if (finalTranscript) props.setUserInput(finalTranscript); + if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) + setIsTranscription(false); + }; + return (
{couldStop && ( @@ -768,6 +807,16 @@ export function ChatActions(props: { }} /> )} + + {config.sttConfig.enable && ( + + isListening ? await stopListening() : await startListening() + } + text={isListening ? Locale.Chat.StopSpeak : Locale.Chat.StartSpeak} + icon={} + /> + )}
); } @@ -940,33 +989,6 @@ function _Chat() { } }; - const [isListening, setIsListening] = useState(false); - const [isTranscription, setIsTranscription] = useState(false); - const [speechApi, setSpeechApi] = useState(null); - - const startListening = async () => { - if (speechApi) { - await speechApi.start(); - setIsListening(true); - } - }; - - const stopListening = async () => { - if (speechApi) { - if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) - setIsTranscription(true); - await speechApi.stop(); - setIsListening(false); - } - }; - - const onRecognitionEnd = (finalTranscript: string) => { - console.log(finalTranscript); - if (finalTranscript) setUserInput(finalTranscript); - if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) - setIsTranscription(false); - }; - const doSubmit = (userInput: string) => { if (userInput.trim() === "") return; const matchCommand = chatCommands.match(userInput); @@ -1037,16 +1059,6 @@ function _Chat() { } }); // eslint-disable-next-line react-hooks/exhaustive-deps - if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE; - setSpeechApi( - config.sttConfig.engine === DEFAULT_STT_ENGINE - ? new WebTranscriptionApi((transcription) => - onRecognitionEnd(transcription), - ) - : new OpenAITranscriptionApi((transcription) => - onRecognitionEnd(transcription), - ), - ); }, []); // check if should send message @@ -1784,6 +1796,7 @@ function _Chat() { setUserInput("/"); onSearch(""); }} + setUserInput={setUserInput} /> diff --git a/app/icons/voice-white.svg b/app/icons/voice-white.svg index 0a4a0ae31cd..e7d5cbcc86f 100644 --- a/app/icons/voice-white.svg +++ b/app/icons/voice-white.svg @@ -1,4 +1,4 @@ - + @@ -7,9 +7,9 @@ - + - + From e9f90a4d82edbb446aedaef7ae27984d21b870d4 Mon Sep 17 00:00:00 2001 From: Meaqua Date: Tue, 27 Aug 2024 21:49:00 +0800 Subject: [PATCH 05/36] fix: i18n --- app/locales/en.ts | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/app/locales/en.ts b/app/locales/en.ts index 1aa2137ec8d..ae20a0d4f89 100644 --- a/app/locales/en.ts +++ b/app/locales/en.ts @@ -490,6 +490,37 @@ const en: LocaleType = { SubTitle: "A larger value decreasing the likelihood to repeat the same line", }, + TTS: { + Enable: { + Title: "Enable TTS", + SubTitle: "Enable text-to-speech service", + }, + Autoplay: { + Title: "Enable Autoplay", + SubTitle: + "Automatically generate speech and play, you need to enable the text-to-speech switch first", + }, + Model: "Model", + Voice: { + Title: "Voice", + SubTitle: "The voice to use when generating the audio", + }, + Speed: { + Title: "Speed", + SubTitle: "The speed of the generated audio", + }, + Engine: "TTS Engine", + }, + STT: { + Enable: { + Title: "Enable STT", + SubTitle: "Enable Speech-to-Text", + }, + Engine: { + Title: "STT Engine", + SubTitle: "Text-to-Speech Engine", + }, + }, }, Store: { DefaultTopic: "New Conversation", From ed5aea0521797841981919fa3c1ebb6340c35168 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Wed, 28 Aug 2024 12:37:19 +0800 Subject: [PATCH 06/36] fix: bug --- app/client/platforms/alibaba.ts | 2 ++ app/client/platforms/anthropic.ts | 9 ++++++++- app/client/platforms/baidu.ts | 2 ++ app/client/platforms/bytedance.ts | 2 ++ app/client/platforms/google.ts | 10 +++++++++- app/client/platforms/iflytek.ts | 9 ++++++++- app/client/platforms/moonshot.ts | 2 ++ app/client/platforms/tencent.ts | 2 ++ 8 files changed, 35 insertions(+), 3 deletions(-) diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts index 477ef193fdc..e839c69f01f 100644 --- a/app/client/platforms/alibaba.ts +++ b/app/client/platforms/alibaba.ts @@ -12,6 +12,8 @@ import { getHeaders, LLMApi, LLMModel, + SpeechOptions, + TranscriptionOptions, MultimodalContent, } from "../api"; import Locale from "../../locales"; diff --git a/app/client/platforms/anthropic.ts b/app/client/platforms/anthropic.ts index df4dc7f3830..f0f95f0fd98 100644 --- a/app/client/platforms/anthropic.ts +++ b/app/client/platforms/anthropic.ts @@ -1,5 +1,12 @@ import { ACCESS_CODE_PREFIX, Anthropic, ApiPath } from "@/app/constant"; -import { ChatOptions, getHeaders, LLMApi, MultimodalContent } from "../api"; +import { + ChatOptions, + getHeaders, + LLMApi, + MultimodalContent, + SpeechOptions, + TranscriptionOptions, +} from "../api"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { getClientConfig } from "@/app/config/client"; import { DEFAULT_API_HOST } from "@/app/constant"; diff --git a/app/client/platforms/baidu.ts b/app/client/platforms/baidu.ts index 2b3119c2a2c..0c2be5fb14b 100644 --- a/app/client/platforms/baidu.ts +++ b/app/client/platforms/baidu.ts @@ -14,6 +14,8 @@ import { LLMApi, LLMModel, MultimodalContent, + SpeechOptions, + TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { diff --git a/app/client/platforms/bytedance.ts b/app/client/platforms/bytedance.ts index 31c0be3d33b..5a0c9b8b12e 100644 --- a/app/client/platforms/bytedance.ts +++ b/app/client/platforms/bytedance.ts @@ -13,6 +13,8 @@ import { LLMApi, LLMModel, MultimodalContent, + SpeechOptions, + TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts index 6c6c3b25e26..c8d3658b350 100644 --- a/app/client/platforms/google.ts +++ b/app/client/platforms/google.ts @@ -1,5 +1,13 @@ import { ApiPath, Google, REQUEST_TIMEOUT_MS } from "@/app/constant"; -import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api"; +import { + ChatOptions, + getHeaders, + LLMApi, + LLMModel, + LLMUsage, + SpeechOptions, + TranscriptionOptions, +} from "../api"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { getClientConfig } from "@/app/config/client"; import { DEFAULT_API_HOST } from "@/app/constant"; diff --git a/app/client/platforms/iflytek.ts b/app/client/platforms/iflytek.ts index 77a4571e124..6463e052e40 100644 --- a/app/client/platforms/iflytek.ts +++ b/app/client/platforms/iflytek.ts @@ -7,7 +7,14 @@ import { } from "@/app/constant"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; -import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api"; +import { + ChatOptions, + getHeaders, + LLMApi, + LLMModel, + SpeechOptions, + TranscriptionOptions, +} from "../api"; import Locale from "../../locales"; import { EventStreamContentType, diff --git a/app/client/platforms/moonshot.ts b/app/client/platforms/moonshot.ts index 22bbaf01f46..b5a8aa5880d 100644 --- a/app/client/platforms/moonshot.ts +++ b/app/client/platforms/moonshot.ts @@ -20,6 +20,8 @@ import { LLMModel, LLMUsage, MultimodalContent, + SpeechOptions, + TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { diff --git a/app/client/platforms/tencent.ts b/app/client/platforms/tencent.ts index 5eb48791b01..1739b7a142b 100644 --- a/app/client/platforms/tencent.ts +++ b/app/client/platforms/tencent.ts @@ -8,6 +8,8 @@ import { LLMApi, LLMModel, MultimodalContent, + SpeechOptions, + TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { From 318e0989a2c28ae323d3f00d8256a7e48169e4a6 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Wed, 28 Aug 2024 13:13:41 +0800 Subject: [PATCH 07/36] fix: transcription headers --- app/client/api.ts | 13 ++++++++----- app/components/chat.tsx | 1 - app/components/tts-config.tsx | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/app/client/api.ts b/app/client/api.ts index 8d0877a0d4d..7e1d0135ed6 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -220,13 +220,16 @@ export function validString(x: string): boolean { return x?.length > 0; } -export function getHeaders() { +export function getHeaders(ignoreHeaders?: boolean) { const accessStore = useAccessStore.getState(); const chatStore = useChatStore.getState(); - const headers: Record = { - "Content-Type": "application/json", - Accept: "application/json", - }; + let headers: Record = {}; + if (!ignoreHeaders) { + headers = { + "Content-Type": "application/json", + Accept: "application/json", + }; + } const clientConfig = getClientConfig(); diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 624b7618e21..f4ebd70d88d 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -1686,7 +1686,6 @@ function _Chat() { ? Locale.Chat.Actions.StopSpeech : Locale.Chat.Actions.Speech } - loding={speechLoading} icon={ speechStatus ? ( diff --git a/app/components/tts-config.tsx b/app/components/tts-config.tsx index f86e3bc520a..39ae85730c2 100644 --- a/app/components/tts-config.tsx +++ b/app/components/tts-config.tsx @@ -1,4 +1,4 @@ -import { PluginConfig, TTSConfig, TTSConfigValidator } from "../store"; +import { TTSConfig, TTSConfigValidator } from "../store"; import Locale from "../locales"; import { ListItem, Select } from "./ui-lib"; @@ -111,6 +111,7 @@ export function TTSConfigList(props: { subTitle={Locale.Settings.TTS.Speed.SubTitle} > Date: Wed, 28 Aug 2024 13:15:52 +0800 Subject: [PATCH 08/36] fix: i18n --- app/locales/en.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/locales/en.ts b/app/locales/en.ts index ae20a0d4f89..dd13ff99cc5 100644 --- a/app/locales/en.ts +++ b/app/locales/en.ts @@ -80,6 +80,8 @@ const en: LocaleType = { return inputHints + ", / to search prompts, : to use commands"; }, Send: "Send", + StartSpeak: "Start Speak", + StopSpeak: "Stop Speak", Config: { Reset: "Reset to Default", SaveAs: "Save as Mask", From ebaeb5a0d5cb2fa514b2529b015ce7c99f13de15 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Wed, 11 Sep 2024 17:54:48 +0800 Subject: [PATCH 09/36] fix: selector css --- app/components/ui-lib.module.scss | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/app/components/ui-lib.module.scss b/app/components/ui-lib.module.scss index 1cd966f19d2..28ecb7e6861 100644 --- a/app/components/ui-lib.module.scss +++ b/app/components/ui-lib.module.scss @@ -312,8 +312,7 @@ min-width: 300px; .list { max-height: 90vh; - overflow-x: hidden; - overflow-y: auto; + overflow: hidden; .list-item { cursor: pointer; From a3585685df479cdb627442828b17f7cb46f33dca Mon Sep 17 00:00:00 2001 From: wuzhiqing Date: Fri, 26 Jul 2024 11:02:02 +0800 Subject: [PATCH 10/36] chore: add ESLint plugin and rules to remove unused imports - Installed eslint-plugin-unused-imports - Updated .eslintrc.json to include rules for detecting unused imports --- .eslintrc.json | 5 ++++- package.json | 1 + yarn.lock | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/.eslintrc.json b/.eslintrc.json index d229e86f250..5b5e88e67aa 100644 --- a/.eslintrc.json +++ b/.eslintrc.json @@ -1,4 +1,7 @@ { "extends": "next/core-web-vitals", - "plugins": ["prettier"] + "plugins": ["prettier", "unused-imports"], + "rules": { + "unused-imports/no-unused-imports": "warn" + } } diff --git a/package.json b/package.json index ca5fcc0f5df..ac4c65f75e6 100644 --- a/package.json +++ b/package.json @@ -66,6 +66,7 @@ "eslint-config-next": "13.4.19", "eslint-config-prettier": "^8.8.0", "eslint-plugin-prettier": "^5.1.3", + "eslint-plugin-unused-imports": "^3.2.0", "husky": "^8.0.0", "lint-staged": "^13.2.2", "prettier": "^3.0.2", diff --git a/yarn.lock b/yarn.lock index 4979e4d995e..8751953cad6 100644 --- a/yarn.lock +++ b/yarn.lock @@ -3367,6 +3367,18 @@ eslint-plugin-react@^7.31.7: semver "^6.3.0" string.prototype.matchall "^4.0.8" +eslint-plugin-unused-imports@^3.2.0: + version "3.2.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-unused-imports/-/eslint-plugin-unused-imports-3.2.0.tgz#63a98c9ad5f622cd9f830f70bc77739f25ccfe0d" + integrity sha512-6uXyn6xdINEpxE1MtDjxQsyXB37lfyO2yKGVVgtD7WEWQGORSOZjgrD6hBhvGv4/SO+TOlS+UnC6JppRqbuwGQ== + dependencies: + eslint-rule-composer "^0.3.0" + +eslint-rule-composer@^0.3.0: + version "0.3.0" + resolved "https://registry.yarnpkg.com/eslint-rule-composer/-/eslint-rule-composer-0.3.0.tgz#79320c927b0c5c0d3d3d2b76c8b4a488f25bbaf9" + integrity sha512-bt+Sh8CtDmn2OajxvNO+BX7Wn4CIWMpTRm3MaiKPCQcnnlm0CS2mhui6QaoeQugs+3Kj2ESKEEGJUdVafwhiCg== + eslint-scope@5.1.1: version "5.1.1" resolved "https://registry.npmmirror.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" From 6e79b9a7a2609691f41f1b1663a3bed455ebdc42 Mon Sep 17 00:00:00 2001 From: lyf <1910527151@qq.com> Date: Sat, 14 Sep 2024 14:19:11 +0800 Subject: [PATCH 11/36] add fork --- app/command.ts | 1 + app/components/chat.tsx | 1 + app/locales/cn.ts | 1 + app/store/chat.ts | 22 ++++++++++++++++++++++ 4 files changed, 25 insertions(+) diff --git a/app/command.ts b/app/command.ts index bea4e06f381..aec73ef53d6 100644 --- a/app/command.ts +++ b/app/command.ts @@ -38,6 +38,7 @@ interface ChatCommands { next?: Command; prev?: Command; clear?: Command; + fork?: Command; del?: Command; } diff --git a/app/components/chat.tsx b/app/components/chat.tsx index dafb9846421..1b1c2a0451c 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -980,6 +980,7 @@ function _Chat() { chatStore.updateCurrentSession( (session) => (session.clearContextIndex = session.messages.length), ), + fork: () => chatStore.forkSession(), del: () => chatStore.deleteSession(chatStore.currentSessionIndex), }); diff --git a/app/locales/cn.ts b/app/locales/cn.ts index 92e81bcb1ba..a93b2defc87 100644 --- a/app/locales/cn.ts +++ b/app/locales/cn.ts @@ -51,6 +51,7 @@ const cn = { next: "下一个聊天", prev: "上一个聊天", clear: "清除上下文", + fork: "复制聊天", del: "删除聊天", }, InputActions: { diff --git a/app/store/chat.ts b/app/store/chat.ts index 58c105e7ef7..2b7fa7e34fe 100644 --- a/app/store/chat.ts +++ b/app/store/chat.ts @@ -195,6 +195,28 @@ export const useChatStore = createPersistStore( } const methods = { + forkSession() { + // 获取当前会话 + const currentSession = get().currentSession(); + if (!currentSession) return; + + const newSession = createEmptySession(); + + newSession.topic = currentSession.topic; + newSession.messages = [...currentSession.messages]; + newSession.mask = { + ...currentSession.mask, + modelConfig: { + ...currentSession.mask.modelConfig, + }, + }; + + set((state) => ({ + currentSessionIndex: 0, + sessions: [newSession, ...state.sessions], + })); + }, + clearSessions() { set(() => ({ sessions: [createEmptySession()], From 63ffd473d57385e38ee76acbc6a78cc13be13fcd Mon Sep 17 00:00:00 2001 From: DDDDD12138 Date: Sun, 15 Sep 2024 20:17:02 +0800 Subject: [PATCH 12/36] chore: remove unused imports --- app/api/[provider]/[...path]/route.ts | 2 +- app/api/alibaba.ts | 2 -- app/api/anthropic.ts | 1 - app/api/azure.ts | 1 - app/api/baidu.ts | 1 - app/api/common.ts | 7 +------ app/api/google.ts | 7 +------ app/api/iflytek.ts | 2 -- app/api/moonshot.ts | 2 -- app/api/tencent/route.ts | 9 +-------- app/client/api.ts | 1 - app/client/platforms/anthropic.ts | 11 ++--------- app/client/platforms/iflytek.ts | 2 +- app/client/platforms/moonshot.ts | 25 +++---------------------- app/client/platforms/openai.ts | 6 ------ app/components/artifacts.tsx | 1 - app/components/chat-list.tsx | 3 +-- app/components/chat.tsx | 1 - app/components/exporter.tsx | 2 +- app/components/mask.tsx | 3 +-- app/components/plugin.tsx | 2 +- app/components/sidebar.tsx | 1 - app/constant.ts | 4 +--- app/locales/cn.ts | 1 - app/locales/sk.ts | 1 - app/masks/index.ts | 3 --- app/store/plugin.ts | 1 - app/store/sync.ts | 9 +++++---- app/store/update.ts | 2 -- app/utils.ts | 3 +-- app/utils/cors.ts | 2 +- 31 files changed, 22 insertions(+), 96 deletions(-) diff --git a/app/api/[provider]/[...path]/route.ts b/app/api/[provider]/[...path]/route.ts index 24aa5ec040f..dffb3e9daa4 100644 --- a/app/api/[provider]/[...path]/route.ts +++ b/app/api/[provider]/[...path]/route.ts @@ -1,5 +1,5 @@ import { ApiPath } from "@/app/constant"; -import { NextRequest, NextResponse } from "next/server"; +import { NextRequest } from "next/server"; import { handle as openaiHandler } from "../../openai"; import { handle as azureHandler } from "../../azure"; import { handle as googleHandler } from "../../google"; diff --git a/app/api/alibaba.ts b/app/api/alibaba.ts index 675d9f301aa..894b1ae4c04 100644 --- a/app/api/alibaba.ts +++ b/app/api/alibaba.ts @@ -1,6 +1,5 @@ import { getServerSideConfig } from "@/app/config/server"; import { - Alibaba, ALIBABA_BASE_URL, ApiPath, ModelProvider, @@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format"; import { NextRequest, NextResponse } from "next/server"; import { auth } from "@/app/api/auth"; import { isModelAvailableInServer } from "@/app/utils/model"; -import type { RequestPayload } from "@/app/client/platforms/openai"; const serverConfig = getServerSideConfig(); diff --git a/app/api/anthropic.ts b/app/api/anthropic.ts index 3d49f4c88c4..3ff747962a7 100644 --- a/app/api/anthropic.ts +++ b/app/api/anthropic.ts @@ -3,7 +3,6 @@ import { ANTHROPIC_BASE_URL, Anthropic, ApiPath, - DEFAULT_MODELS, ServiceProvider, ModelProvider, } from "@/app/constant"; diff --git a/app/api/azure.ts b/app/api/azure.ts index e2cb0c7e66b..39d872e8cf8 100644 --- a/app/api/azure.ts +++ b/app/api/azure.ts @@ -1,4 +1,3 @@ -import { getServerSideConfig } from "@/app/config/server"; import { ModelProvider } from "@/app/constant"; import { prettyObject } from "@/app/utils/format"; import { NextRequest, NextResponse } from "next/server"; diff --git a/app/api/baidu.ts b/app/api/baidu.ts index f4315d186da..0408b43c5bc 100644 --- a/app/api/baidu.ts +++ b/app/api/baidu.ts @@ -3,7 +3,6 @@ import { BAIDU_BASE_URL, ApiPath, ModelProvider, - BAIDU_OATUH_URL, ServiceProvider, } from "@/app/constant"; import { prettyObject } from "@/app/utils/format"; diff --git a/app/api/common.ts b/app/api/common.ts index 25decbf620e..b4c792d6ff0 100644 --- a/app/api/common.ts +++ b/app/api/common.ts @@ -1,11 +1,6 @@ import { NextRequest, NextResponse } from "next/server"; import { getServerSideConfig } from "../config/server"; -import { - DEFAULT_MODELS, - OPENAI_BASE_URL, - GEMINI_BASE_URL, - ServiceProvider, -} from "../constant"; +import { OPENAI_BASE_URL, ServiceProvider } from "../constant"; import { isModelAvailableInServer } from "../utils/model"; import { cloudflareAIGatewayUrl } from "../utils/cloudflare"; diff --git a/app/api/google.ts b/app/api/google.ts index 98fe469bfb7..e6ab472568b 100644 --- a/app/api/google.ts +++ b/app/api/google.ts @@ -1,12 +1,7 @@ import { NextRequest, NextResponse } from "next/server"; import { auth } from "./auth"; import { getServerSideConfig } from "@/app/config/server"; -import { - ApiPath, - GEMINI_BASE_URL, - Google, - ModelProvider, -} from "@/app/constant"; +import { ApiPath, GEMINI_BASE_URL, ModelProvider } from "@/app/constant"; import { prettyObject } from "@/app/utils/format"; const serverConfig = getServerSideConfig(); diff --git a/app/api/iflytek.ts b/app/api/iflytek.ts index eabdd9f4ce6..8b8227dce1f 100644 --- a/app/api/iflytek.ts +++ b/app/api/iflytek.ts @@ -1,6 +1,5 @@ import { getServerSideConfig } from "@/app/config/server"; import { - Iflytek, IFLYTEK_BASE_URL, ApiPath, ModelProvider, @@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format"; import { NextRequest, NextResponse } from "next/server"; import { auth } from "@/app/api/auth"; import { isModelAvailableInServer } from "@/app/utils/model"; -import type { RequestPayload } from "@/app/client/platforms/openai"; // iflytek const serverConfig = getServerSideConfig(); diff --git a/app/api/moonshot.ts b/app/api/moonshot.ts index 247dd618321..5bf4807e3e6 100644 --- a/app/api/moonshot.ts +++ b/app/api/moonshot.ts @@ -1,6 +1,5 @@ import { getServerSideConfig } from "@/app/config/server"; import { - Moonshot, MOONSHOT_BASE_URL, ApiPath, ModelProvider, @@ -10,7 +9,6 @@ import { prettyObject } from "@/app/utils/format"; import { NextRequest, NextResponse } from "next/server"; import { auth } from "@/app/api/auth"; import { isModelAvailableInServer } from "@/app/utils/model"; -import type { RequestPayload } from "@/app/client/platforms/openai"; const serverConfig = getServerSideConfig(); diff --git a/app/api/tencent/route.ts b/app/api/tencent/route.ts index 885909e7a75..fc4f8c79edf 100644 --- a/app/api/tencent/route.ts +++ b/app/api/tencent/route.ts @@ -1,15 +1,8 @@ import { getServerSideConfig } from "@/app/config/server"; -import { - TENCENT_BASE_URL, - ApiPath, - ModelProvider, - ServiceProvider, - Tencent, -} from "@/app/constant"; +import { TENCENT_BASE_URL, ModelProvider } from "@/app/constant"; import { prettyObject } from "@/app/utils/format"; import { NextRequest, NextResponse } from "next/server"; import { auth } from "@/app/api/auth"; -import { isModelAvailableInServer } from "@/app/utils/model"; import { getHeader } from "@/app/utils/tencent"; const serverConfig = getServerSideConfig(); diff --git a/app/client/api.ts b/app/client/api.ts index cecc453baa2..94296b9aa81 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -1,7 +1,6 @@ import { getClientConfig } from "../config/client"; import { ACCESS_CODE_PREFIX, - Azure, ModelProvider, ServiceProvider, } from "../constant"; diff --git a/app/client/platforms/anthropic.ts b/app/client/platforms/anthropic.ts index 7dd39c9cddc..df128c70497 100644 --- a/app/client/platforms/anthropic.ts +++ b/app/client/platforms/anthropic.ts @@ -1,5 +1,5 @@ -import { ACCESS_CODE_PREFIX, Anthropic, ApiPath } from "@/app/constant"; -import { ChatOptions, getHeaders, LLMApi, MultimodalContent } from "../api"; +import { Anthropic, ApiPath } from "@/app/constant"; +import { ChatOptions, getHeaders, LLMApi } from "../api"; import { useAccessStore, useAppConfig, @@ -9,13 +9,6 @@ import { } from "@/app/store"; import { getClientConfig } from "@/app/config/client"; import { DEFAULT_API_HOST } from "@/app/constant"; -import { - EventStreamContentType, - fetchEventSource, -} from "@fortaine/fetch-event-source"; - -import Locale from "../../locales"; -import { prettyObject } from "@/app/utils/format"; import { getMessageTextContent, isVisionModel } from "@/app/utils"; import { preProcessImageContent, stream } from "@/app/utils/chat"; import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare"; diff --git a/app/client/platforms/iflytek.ts b/app/client/platforms/iflytek.ts index 73cea5ba0e7..e29b603e2dc 100644 --- a/app/client/platforms/iflytek.ts +++ b/app/client/platforms/iflytek.ts @@ -17,7 +17,7 @@ import { prettyObject } from "@/app/utils/format"; import { getClientConfig } from "@/app/config/client"; import { getMessageTextContent } from "@/app/utils"; -import { OpenAIListModelResponse, RequestPayload } from "./openai"; +import { RequestPayload } from "./openai"; export class SparkApi implements LLMApi { private disableListModels = true; diff --git a/app/client/platforms/moonshot.ts b/app/client/platforms/moonshot.ts index cd10d2f6c15..d09c4619edf 100644 --- a/app/client/platforms/moonshot.ts +++ b/app/client/platforms/moonshot.ts @@ -3,10 +3,8 @@ import { ApiPath, DEFAULT_API_HOST, - DEFAULT_MODELS, Moonshot, REQUEST_TIMEOUT_MS, - ServiceProvider, } from "@/app/constant"; import { useAccessStore, @@ -15,28 +13,11 @@ import { ChatMessageTool, usePluginStore, } from "@/app/store"; -import { collectModelsWithDefaultModel } from "@/app/utils/model"; -import { preProcessImageContent, stream } from "@/app/utils/chat"; -import { cloudflareAIGatewayUrl } from "@/app/utils/cloudflare"; - -import { - ChatOptions, - getHeaders, - LLMApi, - LLMModel, - LLMUsage, - MultimodalContent, -} from "../api"; -import Locale from "../../locales"; -import { - EventStreamContentType, - fetchEventSource, -} from "@fortaine/fetch-event-source"; -import { prettyObject } from "@/app/utils/format"; +import { stream } from "@/app/utils/chat"; +import { ChatOptions, getHeaders, LLMApi, LLMModel } from "../api"; import { getClientConfig } from "@/app/config/client"; import { getMessageTextContent } from "@/app/utils"; - -import { OpenAIListModelResponse, RequestPayload } from "./openai"; +import { RequestPayload } from "./openai"; export class MoonshotApi implements LLMApi { private disableListModels = true; diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 664ff872ba3..4f9dcd4d048 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -35,15 +35,9 @@ import { MultimodalContent, } from "../api"; import Locale from "../../locales"; -import { - EventStreamContentType, - fetchEventSource, -} from "@fortaine/fetch-event-source"; -import { prettyObject } from "@/app/utils/format"; import { getClientConfig } from "@/app/config/client"; import { getMessageTextContent, - getMessageImages, isVisionModel, isDalle3 as _isDalle3, } from "@/app/utils"; diff --git a/app/components/artifacts.tsx b/app/components/artifacts.tsx index d725ee6596e..ce187fbcb2c 100644 --- a/app/components/artifacts.tsx +++ b/app/components/artifacts.tsx @@ -7,7 +7,6 @@ import { useImperativeHandle, } from "react"; import { useParams } from "react-router"; -import { useWindowSize } from "@/app/utils"; import { IconButton } from "./button"; import { nanoid } from "nanoid"; import ExportIcon from "../icons/share.svg"; diff --git a/app/components/chat-list.tsx b/app/components/chat-list.tsx index 7ef6e7b8337..03b1a5c8803 100644 --- a/app/components/chat-list.tsx +++ b/app/components/chat-list.tsx @@ -1,5 +1,4 @@ import DeleteIcon from "../icons/delete.svg"; -import BotIcon from "../icons/bot.svg"; import styles from "./home.module.scss"; import { @@ -12,7 +11,7 @@ import { import { useChatStore } from "../store"; import Locale from "../locales"; -import { Link, useLocation, useNavigate } from "react-router-dom"; +import { useLocation, useNavigate } from "react-router-dom"; import { Path } from "../constant"; import { MaskAvatar } from "./mask"; import { Mask } from "../store/mask"; diff --git a/app/components/chat.tsx b/app/components/chat.tsx index fc7e04aef79..e1b76db65b0 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -95,7 +95,6 @@ import { import { useNavigate } from "react-router-dom"; import { CHAT_PAGE_SIZE, - LAST_INPUT_KEY, Path, REQUEST_TIMEOUT_MS, UNFINISHED_INPUT, diff --git a/app/components/exporter.tsx b/app/components/exporter.tsx index 1771cc9b013..aba8dc54466 100644 --- a/app/components/exporter.tsx +++ b/app/components/exporter.tsx @@ -1,5 +1,5 @@ /* eslint-disable @next/next/no-img-element */ -import { ChatMessage, ModelType, useAppConfig, useChatStore } from "../store"; +import { ChatMessage, useAppConfig, useChatStore } from "../store"; import Locale from "../locales"; import styles from "./exporter.module.scss"; import { diff --git a/app/components/mask.tsx b/app/components/mask.tsx index ee6c7da97b8..e4dd90826c7 100644 --- a/app/components/mask.tsx +++ b/app/components/mask.tsx @@ -37,7 +37,7 @@ import Locale, { AllLangs, ALL_LANG_OPTIONS, Lang } from "../locales"; import { useNavigate } from "react-router-dom"; import chatStyle from "./chat.module.scss"; -import { useEffect, useState } from "react"; +import { useState } from "react"; import { copyToClipboard, downloadAs, @@ -48,7 +48,6 @@ import { Updater } from "../typing"; import { ModelConfigList } from "./model-config"; import { FileName, Path } from "../constant"; import { BUILTIN_MASK_STORE } from "../masks"; -import { nanoid } from "nanoid"; import { DragDropContext, Droppable, diff --git a/app/components/plugin.tsx b/app/components/plugin.tsx index 6f0b371075e..cf4ae946ef6 100644 --- a/app/components/plugin.tsx +++ b/app/components/plugin.tsx @@ -28,7 +28,7 @@ import { } from "./ui-lib"; import Locale from "../locales"; import { useNavigate } from "react-router-dom"; -import { useEffect, useState } from "react"; +import { useState } from "react"; import { getClientConfig } from "../config/client"; export function PluginPage() { diff --git a/app/components/sidebar.tsx b/app/components/sidebar.tsx index 4ec0f8c84f5..b067d41bead 100644 --- a/app/components/sidebar.tsx +++ b/app/components/sidebar.tsx @@ -7,7 +7,6 @@ import SettingsIcon from "../icons/settings.svg"; import GithubIcon from "../icons/github.svg"; import ChatGptIcon from "../icons/chatgpt.svg"; import AddIcon from "../icons/add.svg"; -import CloseIcon from "../icons/close.svg"; import DeleteIcon from "../icons/delete.svg"; import MaskIcon from "../icons/mask.svg"; import DragIcon from "../icons/drag.svg"; diff --git a/app/constant.ts b/app/constant.ts index 3d33a047e90..e281c457d0f 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -1,5 +1,3 @@ -import path from "path"; - export const OWNER = "ChatGPTNextWeb"; export const REPO = "ChatGPT-Next-Web"; export const REPO_URL = `https://github.com/${OWNER}/${REPO}`; @@ -279,7 +277,7 @@ const openaiModels = [ "gpt-4-1106-preview", "dall-e-3", "o1-mini", - "o1-preview" + "o1-preview", ]; const googleModels = [ diff --git a/app/locales/cn.ts b/app/locales/cn.ts index 92e81bcb1ba..3dd120a3282 100644 --- a/app/locales/cn.ts +++ b/app/locales/cn.ts @@ -1,4 +1,3 @@ -import { ShortcutKeyModal } from "../components/chat"; import { getClientConfig } from "../config/client"; import { SubmitKey } from "../store/config"; diff --git a/app/locales/sk.ts b/app/locales/sk.ts index 2586aaaa7b4..4bf9bf443f9 100644 --- a/app/locales/sk.ts +++ b/app/locales/sk.ts @@ -1,6 +1,5 @@ import { getClientConfig } from "../config/client"; import { SubmitKey } from "../store/config"; -import { LocaleType } from "./index"; import type { PartialLocaleType } from "./index"; // if you are adding a new translation, please use PartialLocaleType instead of LocaleType diff --git a/app/masks/index.ts b/app/masks/index.ts index 92f21c6aea7..bff5c9bbe0a 100644 --- a/app/masks/index.ts +++ b/app/masks/index.ts @@ -1,7 +1,4 @@ import { Mask } from "../store/mask"; -import { CN_MASKS } from "./cn"; -import { TW_MASKS } from "./tw"; -import { EN_MASKS } from "./en"; import { type BuiltinMask } from "./typing"; export { type BuiltinMask } from "./typing"; diff --git a/app/store/plugin.ts b/app/store/plugin.ts index 2356c6db0a7..44679cbdc25 100644 --- a/app/store/plugin.ts +++ b/app/store/plugin.ts @@ -1,5 +1,4 @@ import OpenAPIClientAxios from "openapi-client-axios"; -import { getLang, Lang } from "../locales"; import { StoreKey } from "../constant"; import { nanoid } from "nanoid"; import { createPersistStore } from "../utils/store"; diff --git a/app/store/sync.ts b/app/store/sync.ts index d3582e3c935..9db60d5f410 100644 --- a/app/store/sync.ts +++ b/app/store/sync.ts @@ -1,5 +1,4 @@ import { getClientConfig } from "../config/client"; -import { Updater } from "../typing"; import { ApiPath, STORAGE_KEY, StoreKey } from "../constant"; import { createPersistStore } from "../utils/store"; import { @@ -100,15 +99,17 @@ export const useSyncStore = createPersistStore( const remoteState = await client.get(config.username); if (!remoteState || remoteState === "") { await client.set(config.username, JSON.stringify(localState)); - console.log("[Sync] Remote state is empty, using local state instead."); - return + console.log( + "[Sync] Remote state is empty, using local state instead.", + ); + return; } else { const parsedRemoteState = JSON.parse( await client.get(config.username), ) as AppState; mergeAppState(localState, parsedRemoteState); setLocalAppState(localState); - } + } } catch (e) { console.log("[Sync] failed to get remote state", e); throw e; diff --git a/app/store/update.ts b/app/store/update.ts index 7253caffcb9..e68fde369d5 100644 --- a/app/store/update.ts +++ b/app/store/update.ts @@ -8,8 +8,6 @@ import { getClientConfig } from "../config/client"; import { createPersistStore } from "../utils/store"; import ChatGptIcon from "../icons/chatgpt.png"; import Locale from "../locales"; -import { use } from "react"; -import { useAppConfig } from "."; import { ClientApi } from "../client/api"; const ONE_MINUTE = 60 * 1000; diff --git a/app/utils.ts b/app/utils.ts index bf745092913..9a8bebf38c7 100644 --- a/app/utils.ts +++ b/app/utils.ts @@ -3,8 +3,7 @@ import { showToast } from "./components/ui-lib"; import Locale from "./locales"; import { RequestMessage } from "./client/api"; import { ServiceProvider, REQUEST_TIMEOUT_MS } from "./constant"; -import isObject from "lodash-es/isObject"; -import { fetch as tauriFetch, Body, ResponseType } from "@tauri-apps/api/http"; +import { fetch as tauriFetch, ResponseType } from "@tauri-apps/api/http"; export function trimTopic(topic: string) { // Fix an issue where double quotes still show in the Indonesian language diff --git a/app/utils/cors.ts b/app/utils/cors.ts index fa348f9bf5d..f5e5ce6f0a2 100644 --- a/app/utils/cors.ts +++ b/app/utils/cors.ts @@ -1,5 +1,5 @@ import { getClientConfig } from "../config/client"; -import { ApiPath, DEFAULT_API_HOST } from "../constant"; +import { DEFAULT_API_HOST } from "../constant"; export function corsPath(path: string) { const baseUrl = getClientConfig()?.isApp ? `${DEFAULT_API_HOST}` : ""; From 9e1e0a72521cc84ef74499195f3734850b9ccd13 Mon Sep 17 00:00:00 2001 From: skymkmk Date: Mon, 16 Sep 2024 02:06:17 +0800 Subject: [PATCH 13/36] fix: persisted available models ard not be update after source code have been updated --- app/store/config.ts | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/app/store/config.ts b/app/store/config.ts index 9985b9e768c..679135ee712 100644 --- a/app/store/config.ts +++ b/app/store/config.ts @@ -143,6 +143,21 @@ export const useAppConfig = createPersistStore( { name: StoreKey.Config, version: 4, + + merge(persistedState, currentState) { + const state = persistedState as ChatConfig | undefined; + if (!state) return { ...currentState }; + const models = currentState.models.slice(); + state.models.forEach((pModel) => { + const idx = models.findIndex( + (v) => v.name === pModel.name && v.provider === pModel.provider, + ); + if (idx !== -1) models[idx] = pModel; + else models.push(pModel); + }); + return { ...currentState, ...state, models: models }; + }, + migrate(persistedState, version) { const state = persistedState as ChatConfig; From 36a0c7b8a3ab0c0b138940af7ec2efaf94aadcaf Mon Sep 17 00:00:00 2001 From: skymkmk Date: Mon, 16 Sep 2024 02:07:22 +0800 Subject: [PATCH 14/36] fix: default is forced to set gpt-3.5-turbo if no server default model have been set --- app/store/access.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/store/access.ts b/app/store/access.ts index a1014610e39..7f8067161e3 100644 --- a/app/store/access.ts +++ b/app/store/access.ts @@ -204,8 +204,8 @@ export const useAccessStore = createPersistStore( .then((res) => { // Set default model from env request let defaultModel = res.defaultModel ?? ""; - DEFAULT_CONFIG.modelConfig.model = - defaultModel !== "" ? defaultModel : "gpt-3.5-turbo"; + if (defaultModel !== "") + DEFAULT_CONFIG.modelConfig.model = defaultModel; return res; }) .then((res: DangerConfig) => { From 4ddfa9af8d2645c288428e8c70754546271f3fe5 Mon Sep 17 00:00:00 2001 From: SukkaW Date: Tue, 17 Sep 2024 22:28:13 +0800 Subject: [PATCH 15/36] ci: bump `actions/cache` to v4 --- .github/workflows/deploy_preview.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_preview.yml b/.github/workflows/deploy_preview.yml index bdbb78c27c5..30d9b85b44c 100644 --- a/.github/workflows/deploy_preview.yml +++ b/.github/workflows/deploy_preview.yml @@ -49,7 +49,7 @@ jobs: run: npm install --global vercel@latest - name: Cache dependencies - uses: actions/cache@v2 + uses: actions/cache@v4 id: cache-npm with: path: ~/.npm From 3ae8ec1af6011cec2ff57f62e66531c48576a9bf Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Wed, 18 Sep 2024 11:24:25 +0800 Subject: [PATCH 16/36] feat: tts --- app/client/api.ts | 11 --- app/client/platforms/alibaba.ts | 4 - app/client/platforms/anthropic.ts | 4 - app/client/platforms/baidu.ts | 4 - app/client/platforms/bytedance.ts | 4 - app/client/platforms/google.ts | 5 +- app/client/platforms/iflytek.ts | 4 - app/client/platforms/moonshot.ts | 4 - app/client/platforms/openai.ts | 42 ---------- app/client/platforms/tencent.ts | 4 - app/components/chat.tsx | 58 +------------- app/components/settings.tsx | 12 --- app/components/stt-config.tsx | 51 ------------ app/components/stt.module.scss | 119 ---------------------------- app/constant.ts | 5 -- app/locales/cn.ts | 10 --- app/locales/en.ts | 10 --- app/store/config.ts | 15 ---- app/utils/speech.ts | 126 ------------------------------ 19 files changed, 2 insertions(+), 490 deletions(-) delete mode 100644 app/components/stt-config.tsx delete mode 100644 app/components/stt.module.scss delete mode 100644 app/utils/speech.ts diff --git a/app/client/api.ts b/app/client/api.ts index 9f2cb23053e..9d9977deb24 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -64,16 +64,6 @@ export interface SpeechOptions { onController?: (controller: AbortController) => void; } -export interface TranscriptionOptions { - model?: "whisper-1"; - file: Blob; - language?: string; - prompt?: string; - response_format?: "json" | "text" | "srt" | "verbose_json" | "vtt"; - temperature?: number; - onController?: (controller: AbortController) => void; -} - export interface ChatOptions { messages: RequestMessage[]; config: LLMConfig; @@ -109,7 +99,6 @@ export interface LLMModelProvider { export abstract class LLMApi { abstract chat(options: ChatOptions): Promise; abstract speech(options: SpeechOptions): Promise; - abstract transcription(options: TranscriptionOptions): Promise; abstract usage(): Promise; abstract models(): Promise; } diff --git a/app/client/platforms/alibaba.ts b/app/client/platforms/alibaba.ts index e839c69f01f..4ade9ebb98f 100644 --- a/app/client/platforms/alibaba.ts +++ b/app/client/platforms/alibaba.ts @@ -13,7 +13,6 @@ import { LLMApi, LLMModel, SpeechOptions, - TranscriptionOptions, MultimodalContent, } from "../api"; import Locale from "../../locales"; @@ -88,9 +87,6 @@ export class QwenApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } async chat(options: ChatOptions) { const messages = options.messages.map((v) => ({ diff --git a/app/client/platforms/anthropic.ts b/app/client/platforms/anthropic.ts index 2ab67ed1371..e624a2e16cf 100644 --- a/app/client/platforms/anthropic.ts +++ b/app/client/platforms/anthropic.ts @@ -5,7 +5,6 @@ import { LLMApi, MultimodalContent, SpeechOptions, - TranscriptionOptions, } from "../api"; import { useAccessStore, @@ -90,9 +89,6 @@ export class ClaudeApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } extractMessage(res: any) { console.log("[Response] claude response: ", res); diff --git a/app/client/platforms/baidu.ts b/app/client/platforms/baidu.ts index 0c2be5fb14b..c360417c602 100644 --- a/app/client/platforms/baidu.ts +++ b/app/client/platforms/baidu.ts @@ -15,7 +15,6 @@ import { LLMModel, MultimodalContent, SpeechOptions, - TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { @@ -80,9 +79,6 @@ export class ErnieApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } async chat(options: ChatOptions) { const messages = options.messages.map((v) => ({ diff --git a/app/client/platforms/bytedance.ts b/app/client/platforms/bytedance.ts index 5a0c9b8b12e..a6e2d426ee3 100644 --- a/app/client/platforms/bytedance.ts +++ b/app/client/platforms/bytedance.ts @@ -14,7 +14,6 @@ import { LLMModel, MultimodalContent, SpeechOptions, - TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { @@ -82,9 +81,6 @@ export class DoubaoApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } async chat(options: ChatOptions) { const messages = options.messages.map((v) => ({ diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts index c8d3658b350..ecb5ce44b57 100644 --- a/app/client/platforms/google.ts +++ b/app/client/platforms/google.ts @@ -6,7 +6,6 @@ import { LLMModel, LLMUsage, SpeechOptions, - TranscriptionOptions, } from "../api"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { getClientConfig } from "@/app/config/client"; @@ -67,9 +66,7 @@ export class GeminiProApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } + async chat(options: ChatOptions): Promise { const apiClient = this; let multimodal = false; diff --git a/app/client/platforms/iflytek.ts b/app/client/platforms/iflytek.ts index 6463e052e40..bd0c6083809 100644 --- a/app/client/platforms/iflytek.ts +++ b/app/client/platforms/iflytek.ts @@ -13,7 +13,6 @@ import { LLMApi, LLMModel, SpeechOptions, - TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { @@ -63,9 +62,6 @@ export class SparkApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } async chat(options: ChatOptions) { const messages: ChatOptions["messages"] = []; diff --git a/app/client/platforms/moonshot.ts b/app/client/platforms/moonshot.ts index 173ecd14c9d..bf8f18751d2 100644 --- a/app/client/platforms/moonshot.ts +++ b/app/client/platforms/moonshot.ts @@ -27,7 +27,6 @@ import { LLMUsage, MultimodalContent, SpeechOptions, - TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { @@ -77,9 +76,6 @@ export class MoonshotApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } async chat(options: ChatOptions) { const messages: ChatOptions["messages"] = []; diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 71b7731fa0b..70b191f5c1d 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -34,7 +34,6 @@ import { LLMUsage, MultimodalContent, SpeechOptions, - TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { @@ -187,47 +186,6 @@ export class ChatGPTApi implements LLMApi { } } - async transcription(options: TranscriptionOptions): Promise { - const formData = new FormData(); - formData.append("file", options.file, "audio.wav"); - formData.append("model", options.model ?? "whisper-1"); - if (options.language) formData.append("language", options.language); - if (options.prompt) formData.append("prompt", options.prompt); - if (options.response_format) - formData.append("response_format", options.response_format); - if (options.temperature) - formData.append("temperature", options.temperature.toString()); - - console.log("[Request] openai audio transcriptions payload: ", options); - - const controller = new AbortController(); - options.onController?.(controller); - - try { - const path = this.path(OpenaiPath.TranscriptionPath, options.model); - const headers = getHeaders(true); - const payload = { - method: "POST", - body: formData, - signal: controller.signal, - headers: headers, - }; - - // make a fetch request - const requestTimeoutId = setTimeout( - () => controller.abort(), - REQUEST_TIMEOUT_MS, - ); - const res = await fetch(path, payload); - clearTimeout(requestTimeoutId); - const json = await res.json(); - return json.text; - } catch (e) { - console.log("[Request] failed to make a audio transcriptions request", e); - throw e; - } - } - async chat(options: ChatOptions) { const modelConfig = { ...useAppConfig.getState().modelConfig, diff --git a/app/client/platforms/tencent.ts b/app/client/platforms/tencent.ts index 1739b7a142b..3e8f1a45957 100644 --- a/app/client/platforms/tencent.ts +++ b/app/client/platforms/tencent.ts @@ -9,7 +9,6 @@ import { LLMModel, MultimodalContent, SpeechOptions, - TranscriptionOptions, } from "../api"; import Locale from "../../locales"; import { @@ -94,9 +93,6 @@ export class HunyuanApi implements LLMApi { speech(options: SpeechOptions): Promise { throw new Error("Method not implemented."); } - transcription(options: TranscriptionOptions): Promise { - throw new Error("Method not implemented."); - } async chat(options: ChatOptions) { const visionModel = isVisionModel(options.config.model); diff --git a/app/components/chat.tsx b/app/components/chat.tsx index cb03440775e..59ffa01c1ce 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -10,7 +10,6 @@ import React, { } from "react"; import SendWhiteIcon from "../icons/send-white.svg"; -import VoiceWhiteIcon from "../icons/voice-white.svg"; import BrainIcon from "../icons/brain.svg"; import RenameIcon from "../icons/rename.svg"; import ExportIcon from "../icons/share.svg"; @@ -83,7 +82,7 @@ import dynamic from "next/dynamic"; import { ChatControllerPool } from "../client/controller"; import { DalleSize, DalleQuality, DalleStyle } from "../typing"; import { Prompt, usePromptStore } from "../store/prompt"; -import Locale, { getLang, getSTTLang } from "../locales"; +import Locale from "../locales"; import { IconButton } from "./button"; import styles from "./chat.module.scss"; @@ -100,9 +99,7 @@ import { import { useNavigate } from "react-router-dom"; import { CHAT_PAGE_SIZE, - DEFAULT_STT_ENGINE, DEFAULT_TTS_ENGINE, - FIREFOX_DEFAULT_STT_ENGINE, ModelProvider, LAST_INPUT_KEY, Path, @@ -123,11 +120,6 @@ import { MultimodalContent } from "../client/api"; const localStorage = safeLocalStorage(); import { ClientApi } from "../client/api"; import { createTTSPlayer } from "../utils/audio"; -import { - OpenAITranscriptionApi, - SpeechApi, - WebTranscriptionApi, -} from "../utils/speech"; import { MsEdgeTTS, OUTPUT_FORMAT } from "../utils/ms_edge_tts"; const ttsPlayer = createTTSPlayer(); @@ -556,44 +548,6 @@ export function ChatActions(props: { } }, [chatStore, currentModel, models]); - const [isListening, setIsListening] = useState(false); - const [isTranscription, setIsTranscription] = useState(false); - const [speechApi, setSpeechApi] = useState(null); - - useEffect(() => { - if (isFirefox()) config.sttConfig.engine = FIREFOX_DEFAULT_STT_ENGINE; - setSpeechApi( - config.sttConfig.engine === DEFAULT_STT_ENGINE - ? new WebTranscriptionApi((transcription) => - onRecognitionEnd(transcription), - ) - : new OpenAITranscriptionApi((transcription) => - onRecognitionEnd(transcription), - ), - ); - }, []); - - const startListening = async () => { - if (speechApi) { - await speechApi.start(); - setIsListening(true); - } - }; - const stopListening = async () => { - if (speechApi) { - if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) - setIsTranscription(true); - await speechApi.stop(); - setIsListening(false); - } - }; - const onRecognitionEnd = (finalTranscript: string) => { - console.log(finalTranscript); - if (finalTranscript) props.setUserInput(finalTranscript); - if (config.sttConfig.engine !== DEFAULT_STT_ENGINE) - setIsTranscription(false); - }; - return (
{couldStop && ( @@ -828,16 +782,6 @@ export function ChatActions(props: { icon={} /> )} - - {config.sttConfig.enable && ( - - isListening ? await stopListening() : await startListening() - } - text={isListening ? Locale.Chat.StopSpeak : Locale.Chat.StartSpeak} - icon={} - /> - )}
); } diff --git a/app/components/settings.tsx b/app/components/settings.tsx index 47a72d79de7..33bf8b2e74e 100644 --- a/app/components/settings.tsx +++ b/app/components/settings.tsx @@ -81,7 +81,6 @@ import { nanoid } from "nanoid"; import { useMaskStore } from "../store/mask"; import { ProviderType } from "../utils/cloud"; import { TTSConfigList } from "./tts-config"; -import { STTConfigList } from "./stt-config"; function EditPromptModal(props: { id: string; onClose: () => void }) { const promptStore = usePromptStore(); @@ -1659,17 +1658,6 @@ export function Settings() { /> - - { - const sttConfig = { ...config.sttConfig }; - updater(sttConfig); - config.update((config) => (config.sttConfig = sttConfig)); - }} - /> - - diff --git a/app/components/stt-config.tsx b/app/components/stt-config.tsx deleted file mode 100644 index f83d280305f..00000000000 --- a/app/components/stt-config.tsx +++ /dev/null @@ -1,51 +0,0 @@ -import { STTConfig, STTConfigValidator } from "../store"; - -import Locale from "../locales"; -import { ListItem, Select } from "./ui-lib"; -import { DEFAULT_STT_ENGINES } from "../constant"; -import { isFirefox } from "../utils"; - -export function STTConfigList(props: { - sttConfig: STTConfig; - updateConfig: (updater: (config: STTConfig) => void) => void; -}) { - return ( - <> - - - props.updateConfig( - (config) => (config.enable = e.currentTarget.checked), - ) - } - > - - {!isFirefox() && ( - - - - )} - - ); -} diff --git a/app/components/stt.module.scss b/app/components/stt.module.scss deleted file mode 100644 index ba9f382e40b..00000000000 --- a/app/components/stt.module.scss +++ /dev/null @@ -1,119 +0,0 @@ -@import "../styles/animation.scss"; -.plugin-page { - height: 100%; - display: flex; - flex-direction: column; - - .plugin-page-body { - padding: 20px; - overflow-y: auto; - - .plugin-filter { - width: 100%; - max-width: 100%; - margin-bottom: 20px; - animation: slide-in ease 0.3s; - height: 40px; - - display: flex; - - .search-bar { - flex-grow: 1; - max-width: 100%; - min-width: 0; - outline: none; - } - - .search-bar:focus { - border: 1px solid var(--primary); - } - - .plugin-filter-lang { - height: 100%; - margin-left: 10px; - } - - .plugin-create { - height: 100%; - margin-left: 10px; - box-sizing: border-box; - min-width: 80px; - } - } - - .plugin-item { - display: flex; - justify-content: space-between; - padding: 20px; - border: var(--border-in-light); - animation: slide-in ease 0.3s; - - &:not(:last-child) { - border-bottom: 0; - } - - &:first-child { - border-top-left-radius: 10px; - border-top-right-radius: 10px; - } - - &:last-child { - border-bottom-left-radius: 10px; - border-bottom-right-radius: 10px; - } - - .plugin-header { - display: flex; - align-items: center; - - .plugin-icon { - display: flex; - align-items: center; - justify-content: center; - margin-right: 10px; - } - - .plugin-title { - .plugin-name { - font-size: 14px; - font-weight: bold; - } - .plugin-info { - font-size: 12px; - } - .plugin-runtime-warning { - font-size: 12px; - color: #f86c6c; - } - } - } - - .plugin-actions { - display: flex; - flex-wrap: nowrap; - transition: all ease 0.3s; - justify-content: center; - align-items: center; - } - - @media screen and (max-width: 600px) { - display: flex; - flex-direction: column; - padding-bottom: 10px; - border-radius: 10px; - margin-bottom: 20px; - box-shadow: var(--card-shadow); - - &:not(:last-child) { - border-bottom: var(--border-in-light); - } - - .plugin-actions { - width: 100%; - justify-content: space-between; - padding-top: 10px; - } - } - } - } -} diff --git a/app/constant.ts b/app/constant.ts index d349268baad..7d7fcf78b7c 100644 --- a/app/constant.ts +++ b/app/constant.ts @@ -153,7 +153,6 @@ export const Anthropic = { export const OpenaiPath = { ChatPath: "v1/chat/completions", SpeechPath: "v1/audio/speech", - TranscriptionPath: "v1/audio/transcriptions", ImagePath: "v1/images/generations", UsagePath: "dashboard/billing/usage", SubsPath: "dashboard/billing/subscription", @@ -274,10 +273,6 @@ export const DEFAULT_TTS_VOICES = [ "shimmer", ]; -export const DEFAULT_STT_ENGINE = "WebAPI"; -export const DEFAULT_STT_ENGINES = ["WebAPI", "OpenAI Whisper"]; -export const FIREFOX_DEFAULT_STT_ENGINE = "OpenAI Whisper"; - const openaiModels = [ "gpt-3.5-turbo", "gpt-3.5-turbo-1106", diff --git a/app/locales/cn.ts b/app/locales/cn.ts index 05e33049199..979485a0029 100644 --- a/app/locales/cn.ts +++ b/app/locales/cn.ts @@ -520,16 +520,6 @@ const cn = { SubTitle: "生成语音的速度", }, }, - STT: { - Enable: { - Title: "启用语音转文本", - SubTitle: "启用语音转文本", - }, - Engine: { - Title: "转换引擎", - SubTitle: "音频转换引擎", - }, - }, }, Store: { DefaultTopic: "新的聊天", diff --git a/app/locales/en.ts b/app/locales/en.ts index 0c2d2d27d5a..4bf065033eb 100644 --- a/app/locales/en.ts +++ b/app/locales/en.ts @@ -527,16 +527,6 @@ const en: LocaleType = { }, Engine: "TTS Engine", }, - STT: { - Enable: { - Title: "Enable STT", - SubTitle: "Enable Speech-to-Text", - }, - Engine: { - Title: "STT Engine", - SubTitle: "Text-to-Speech Engine", - }, - }, }, Store: { DefaultTopic: "New Conversation", diff --git a/app/store/config.ts b/app/store/config.ts index e2a2f874733..39268c69b8f 100644 --- a/app/store/config.ts +++ b/app/store/config.ts @@ -5,8 +5,6 @@ import { DEFAULT_INPUT_TEMPLATE, DEFAULT_MODELS, DEFAULT_SIDEBAR_WIDTH, - DEFAULT_STT_ENGINE, - DEFAULT_STT_ENGINES, DEFAULT_TTS_ENGINE, DEFAULT_TTS_ENGINES, DEFAULT_TTS_MODEL, @@ -23,8 +21,6 @@ export type TTSModelType = (typeof DEFAULT_TTS_MODELS)[number]; export type TTSVoiceType = (typeof DEFAULT_TTS_VOICES)[number]; export type TTSEngineType = (typeof DEFAULT_TTS_ENGINES)[number]; -export type STTEngineType = (typeof DEFAULT_STT_ENGINES)[number]; - export enum SubmitKey { Enter = "Enter", CtrlEnter = "Ctrl + Enter", @@ -90,17 +86,12 @@ export const DEFAULT_CONFIG = { voice: DEFAULT_TTS_VOICE, speed: 1.0, }, - sttConfig: { - enable: false, - engine: DEFAULT_STT_ENGINE, - }, }; export type ChatConfig = typeof DEFAULT_CONFIG; export type ModelConfig = ChatConfig["modelConfig"]; export type TTSConfig = ChatConfig["ttsConfig"]; -export type STTConfig = ChatConfig["sttConfig"]; export function limitNumber( x: number, @@ -130,12 +121,6 @@ export const TTSConfigValidator = { }, }; -export const STTConfigValidator = { - engine(x: string) { - return x as STTEngineType; - }, -}; - export const ModalConfigValidator = { model(x: string) { return x as ModelType; diff --git a/app/utils/speech.ts b/app/utils/speech.ts deleted file mode 100644 index dc8102879fb..00000000000 --- a/app/utils/speech.ts +++ /dev/null @@ -1,126 +0,0 @@ -import { ChatGPTApi } from "../client/platforms/openai"; -import { getSTTLang } from "../locales"; -import { isFirefox } from "../utils"; - -export type TranscriptionCallback = (transcription: string) => void; - -export abstract class SpeechApi { - protected onTranscription: TranscriptionCallback = () => {}; - - abstract isListening(): boolean; - abstract start(): Promise; - abstract stop(): Promise; - - onTranscriptionReceived(callback: TranscriptionCallback) { - this.onTranscription = callback; - } -} - -export class OpenAITranscriptionApi extends SpeechApi { - private listeningStatus = false; - private mediaRecorder: MediaRecorder | null = null; - private stream: MediaStream | null = null; - private audioChunks: Blob[] = []; - - isListening = () => this.listeningStatus; - - constructor(transcriptionCallback?: TranscriptionCallback) { - super(); - if (transcriptionCallback) { - this.onTranscriptionReceived(transcriptionCallback); - } - } - - async start(): Promise { - // @ts-ignore - navigator.getUserMedia = - // @ts-ignore - navigator.getUserMedia || - // @ts-ignore - navigator.webkitGetUserMedia || - // @ts-ignore - navigator.mozGetUserMedia || - // @ts-ignore - navigator.msGetUserMedia; - if (navigator.mediaDevices) { - const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); - this.mediaRecorder = new MediaRecorder(stream); - this.mediaRecorder.ondataavailable = (e) => { - if (e.data && e.data.size > 0) { - this.audioChunks.push(e.data); - } - }; - - this.stream = stream; - } else { - console.warn("Media Decives will work only with SSL"); - return; - } - - this.audioChunks = []; - - // this.recorder.addEventListener("dataavailable", (event) => { - // this.audioChunks.push(event.data); - // }); - - this.mediaRecorder.start(1000); - this.listeningStatus = true; - } - - async stop(): Promise { - if (!this.mediaRecorder || !this.listeningStatus) { - return; - } - - return new Promise((resolve) => { - this.mediaRecorder!.addEventListener("stop", async () => { - const audioBlob = new Blob(this.audioChunks, { type: "audio/wav" }); - const llm = new ChatGPTApi(); - const transcription = await llm.transcription({ file: audioBlob }); - this.onTranscription(transcription); - this.listeningStatus = false; - resolve(); - }); - - this.mediaRecorder!.stop(); - }); - } -} - -export class WebTranscriptionApi extends SpeechApi { - private listeningStatus = false; - private recognitionInstance: any | null = null; - - isListening = () => this.listeningStatus; - - constructor(transcriptionCallback?: TranscriptionCallback) { - super(); - if (isFirefox()) return; - const SpeechRecognition = - (window as any).SpeechRecognition || - (window as any).webkitSpeechRecognition; - this.recognitionInstance = new SpeechRecognition(); - this.recognitionInstance.continuous = true; - this.recognitionInstance.interimResults = true; - this.recognitionInstance.lang = getSTTLang(); - if (transcriptionCallback) { - this.onTranscriptionReceived(transcriptionCallback); - } - this.recognitionInstance.onresult = (event: any) => { - const result = event.results[event.results.length - 1]; - if (result.isFinal) { - this.onTranscription(result[0].transcript); - } - }; - } - - async start(): Promise { - this.listeningStatus = true; - await this.recognitionInstance.start(); - } - - async stop(): Promise { - this.listeningStatus = false; - await this.recognitionInstance.stop(); - } -} From fd47bc1dc3e969d98410f0ea384b71c5ec26de34 Mon Sep 17 00:00:00 2001 From: lyf <1910527151@qq.com> Date: Wed, 18 Sep 2024 13:56:44 +0800 Subject: [PATCH 17/36] Add English copy --- app/locales/en.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/app/locales/en.ts b/app/locales/en.ts index 09b76f1fa12..80859c543d6 100644 --- a/app/locales/en.ts +++ b/app/locales/en.ts @@ -52,6 +52,7 @@ const en: LocaleType = { next: "Next Chat", prev: "Previous Chat", clear: "Clear Context", + fork: "Copy Chat", del: "Delete Chat", }, InputActions: { From a3b664763e8438196d29a074df366abef814b4c6 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Wed, 18 Sep 2024 14:57:43 +0800 Subject: [PATCH 18/36] chore: default header --- app/client/api.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/client/api.ts b/app/client/api.ts index 97a0d061a99..8285b4d9f94 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -216,7 +216,7 @@ export function validString(x: string): boolean { return x?.length > 0; } -export function getHeaders(ignoreHeaders?: boolean) { +export function getHeaders(ignoreHeaders: boolean = false) { const accessStore = useAccessStore.getState(); const chatStore = useChatStore.getState(); let headers: Record = {}; From 7f1b44befe8449f767968f545742049ff90a089b Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Wed, 18 Sep 2024 15:04:41 +0800 Subject: [PATCH 19/36] fix: css --- app/components/sidebar.tsx | 5 ----- app/components/ui-lib.module.scss | 3 ++- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/app/components/sidebar.tsx b/app/components/sidebar.tsx index 4ec0f8c84f5..7d5ca2a68ee 100644 --- a/app/components/sidebar.tsx +++ b/app/components/sidebar.tsx @@ -254,11 +254,6 @@ export function SideBar(props: { className?: string }) { {showPluginSelector && ( { return { title: item.name, diff --git a/app/components/ui-lib.module.scss b/app/components/ui-lib.module.scss index 28ecb7e6861..1cd966f19d2 100644 --- a/app/components/ui-lib.module.scss +++ b/app/components/ui-lib.module.scss @@ -312,7 +312,8 @@ min-width: 300px; .list { max-height: 90vh; - overflow: hidden; + overflow-x: hidden; + overflow-y: auto; .list-item { cursor: pointer; From 10d7a64f8869e1b35cc2e296d111431f2a00945d Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Wed, 18 Sep 2024 15:37:21 +0800 Subject: [PATCH 20/36] fix: error --- app/client/platforms/openai.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index a1b83a922b7..d86be718bce 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -79,7 +79,7 @@ export interface DalleRequestPayload { export class ChatGPTApi implements LLMApi { private disableListModels = true; - path(path: string, model?: string): string { + path(path: string): string { const accessStore = useAccessStore.getState(); let baseUrl = ""; @@ -157,7 +157,7 @@ export class ChatGPTApi implements LLMApi { options.onController?.(controller); try { - const speechPath = this.path(OpenaiPath.SpeechPath, options.model); + const speechPath = this.path(OpenaiPath.SpeechPath); const speechPayload = { method: "POST", body: JSON.stringify(requestPayload), From accb526cd649fe505f1bf3e4a5bcc1b01d1bdf40 Mon Sep 17 00:00:00 2001 From: JuliusMoehring <44407680+JuliusMoehring@users.noreply.github.com> Date: Wed, 18 Sep 2024 18:07:10 +0200 Subject: [PATCH 21/36] Avoid fetching prompts.json serverside --- app/store/prompt.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/app/store/prompt.ts b/app/store/prompt.ts index a25cda5813a..f746edecd4f 100644 --- a/app/store/prompt.ts +++ b/app/store/prompt.ts @@ -1,7 +1,7 @@ import Fuse from "fuse.js"; -import { getLang } from "../locales"; -import { StoreKey } from "../constant"; import { nanoid } from "nanoid"; +import { StoreKey } from "../constant"; +import { getLang } from "../locales"; import { createPersistStore } from "../utils/store"; export interface Prompt { @@ -147,6 +147,11 @@ export const usePromptStore = createPersistStore( }, onRehydrateStorage(state) { + // Skip store rehydration on server side + if (typeof window === "undefined") { + return; + } + const PROMPT_URL = "./prompts.json"; type PromptList = Array<[string, string]>; From 212d15fdd0adcdd8df8eec85b3131242831275bb Mon Sep 17 00:00:00 2001 From: Yudong Date: Thu, 19 Sep 2024 11:20:18 +0800 Subject: [PATCH 22/36] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86typo=EF=BC=8CW?= =?UTF-8?q?HITE=5FWEBDEV=5FENDPOINTS=20->=20WHITE=5FWEBDAV=5FENDPOINTS?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.template | 2 +- README.md | 2 +- README_CN.md | 2 +- README_JA.md | 2 +- app/config/server.ts | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.env.template b/.env.template index 25addf2b3e5..82f44216ab8 100644 --- a/.env.template +++ b/.env.template @@ -66,4 +66,4 @@ ANTHROPIC_API_VERSION= ANTHROPIC_URL= ### (optional) -WHITE_WEBDEV_ENDPOINTS= \ No newline at end of file +WHITE_WEBDAV_ENDPOINTS= \ No newline at end of file diff --git a/README.md b/README.md index c8b158956b3..2001d4d8878 100644 --- a/README.md +++ b/README.md @@ -340,7 +340,7 @@ For ByteDance: use `modelName@bytedance=deploymentName` to customize model name Change default model -### `WHITE_WEBDEV_ENDPOINTS` (optional) +### `WHITE_WEBDAV_ENDPOINTS` (optional) You can use this option if you want to increase the number of webdav service addresses you are allowed to access, as required by the format: - Each address must be a complete endpoint diff --git a/README_CN.md b/README_CN.md index beed396c5aa..7831e2ee981 100644 --- a/README_CN.md +++ b/README_CN.md @@ -202,7 +202,7 @@ ByteDance Api Url. 如果你想禁用从链接解析预制设置,将此环境变量设置为 1 即可。 -### `WHITE_WEBDEV_ENDPOINTS` (可选) +### `WHITE_WEBDAV_ENDPOINTS` (可选) 如果你想增加允许访问的webdav服务地址,可以使用该选项,格式要求: - 每一个地址必须是一个完整的 endpoint diff --git a/README_JA.md b/README_JA.md index 6b8caadae6c..1716089af45 100644 --- a/README_JA.md +++ b/README_JA.md @@ -193,7 +193,7 @@ ByteDance API の URL。 リンクからのプリセット設定解析を無効にしたい場合は、この環境変数を 1 に設定します。 -### `WHITE_WEBDEV_ENDPOINTS` (オプション) +### `WHITE_WEBDAV_ENDPOINTS` (オプション) アクセス許可を与える WebDAV サービスのアドレスを追加したい場合、このオプションを使用します。フォーマット要件: - 各アドレスは完全なエンドポイントでなければなりません。 diff --git a/app/config/server.ts b/app/config/server.ts index 676b0174f24..d98488129cd 100644 --- a/app/config/server.ts +++ b/app/config/server.ts @@ -155,7 +155,7 @@ export const getServerSideConfig = () => { // ); const allowedWebDevEndpoints = ( - process.env.WHITE_WEBDEV_ENDPOINTS ?? "" + process.env.WHITE_WEBDAV_ENDPOINTS ?? "" ).split(","); return { From df222ded12b9a501b8a5edd87297e089d9881907 Mon Sep 17 00:00:00 2001 From: Yudong Date: Thu, 19 Sep 2024 14:15:31 +0800 Subject: [PATCH 23/36] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E4=BA=86typo,=20WebDev?= =?UTF-8?q?=20->=20WebDav?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/api/webdav/[...path]/route.ts | 2 +- app/config/server.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/app/api/webdav/[...path]/route.ts b/app/api/webdav/[...path]/route.ts index 9f96cbfcf74..bb7743bda40 100644 --- a/app/api/webdav/[...path]/route.ts +++ b/app/api/webdav/[...path]/route.ts @@ -6,7 +6,7 @@ const config = getServerSideConfig(); const mergedAllowedWebDavEndpoints = [ ...internalAllowedWebDavEndpoints, - ...config.allowedWebDevEndpoints, + ...config.allowedWebDavEndpoints, ].filter((domain) => Boolean(domain.trim())); const normalizeUrl = (url: string) => { diff --git a/app/config/server.ts b/app/config/server.ts index d98488129cd..6544fe5641d 100644 --- a/app/config/server.ts +++ b/app/config/server.ts @@ -154,7 +154,7 @@ export const getServerSideConfig = () => { // `[Server Config] using ${randomIndex + 1} of ${apiKeys.length} api key`, // ); - const allowedWebDevEndpoints = ( + const allowedWebDavEndpoints = ( process.env.WHITE_WEBDAV_ENDPOINTS ?? "" ).split(","); @@ -229,6 +229,6 @@ export const getServerSideConfig = () => { disableFastLink: !!process.env.DISABLE_FAST_LINK, customModels, defaultModel, - allowedWebDevEndpoints, + allowedWebDavEndpoints, }; }; From 4d1f9e49d46eed55ff5ad970092be8d1d464416c Mon Sep 17 00:00:00 2001 From: lloydzhou Date: Sun, 22 Sep 2024 18:53:51 +0800 Subject: [PATCH 24/36] hotfix openai function call tool_calls no index --- app/client/platforms/openai.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index d86be718bce..c59a0519c1b 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -277,6 +277,7 @@ export class ChatGPTApi implements LLMApi { ); } if (shouldStream) { + let index = -1; const [tools, funcs] = usePluginStore .getState() .getAsTools( @@ -302,7 +303,7 @@ export class ChatGPTApi implements LLMApi { }>; const tool_calls = choices[0]?.delta?.tool_calls; if (tool_calls?.length > 0) { - const index = tool_calls[0]?.index; + index += 1; const id = tool_calls[0]?.id; const args = tool_calls[0]?.function?.arguments; if (id) { @@ -327,6 +328,8 @@ export class ChatGPTApi implements LLMApi { toolCallMessage: any, toolCallResult: any[], ) => { + // reset index value + index = -1; // @ts-ignore requestPayload?.messages?.splice( // @ts-ignore From 3a969054e309797153a01ec3283e926dee75008c Mon Sep 17 00:00:00 2001 From: lloydzhou Date: Sun, 22 Sep 2024 18:59:49 +0800 Subject: [PATCH 25/36] hotfix openai function call tool_calls no index --- app/client/platforms/openai.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index c59a0519c1b..0a8d6203ae5 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -303,10 +303,10 @@ export class ChatGPTApi implements LLMApi { }>; const tool_calls = choices[0]?.delta?.tool_calls; if (tool_calls?.length > 0) { - index += 1; const id = tool_calls[0]?.id; const args = tool_calls[0]?.function?.arguments; if (id) { + index += 1; runTools.push({ id, type: tool_calls[0]?.type, From 35aa2c7270042acfbbd4049fc0e48fefd1aafb10 Mon Sep 17 00:00:00 2001 From: lyf <1910527151@qq.com> Date: Mon, 23 Sep 2024 11:34:20 +0800 Subject: [PATCH 26/36] Fix code duplication --- app/components/markdown.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/app/components/markdown.tsx b/app/components/markdown.tsx index b57fd74904c..17823300013 100644 --- a/app/components/markdown.tsx +++ b/app/components/markdown.tsx @@ -128,8 +128,10 @@ export function PreCode(props: { children: any }) { className="copy-code-button" onClick={() => { if (ref.current) { - const code = ref.current.innerText; - copyToClipboard(code); + // const code = ref.current.innerText; + copyToClipboard( + ref.current.querySelector("code")?.innerText ?? "", + ); } }} > From 0e210cf8de4b4a5c75acd8684b706a840ca947ba Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Mon, 23 Sep 2024 14:13:09 +0800 Subject: [PATCH 27/36] =?UTF-8?q?fix:=20#5486=20plugin=E6=A0=B7=E5=BC=8F?= =?UTF-8?q?=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/components/plugin.module.scss | 25 ++++++++++++++++++++++++- app/components/plugin.tsx | 4 ++-- 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/app/components/plugin.module.scss b/app/components/plugin.module.scss index a179e0a07a8..4b0e990e98f 100644 --- a/app/components/plugin.module.scss +++ b/app/components/plugin.module.scss @@ -10,7 +10,30 @@ max-height: 240px; overflow-y: auto; white-space: pre-wrap; - min-width: 300px; + min-width: 280px; } } +.plugin-schema { + display: flex; + justify-content: flex-end; + flex-wrap: wrap; + flex-direction: row; + + input { + margin-right: 20px; + + @media screen and (max-width: 600px) { + margin-right: 0px; + } + } + + @media screen and (max-width: 600px) { + flex-direction: column; + gap: 5px; + + button { + padding: 10px; + } + } +} diff --git a/app/components/plugin.tsx b/app/components/plugin.tsx index cf4ae946ef6..a768c78a867 100644 --- a/app/components/plugin.tsx +++ b/app/components/plugin.tsx @@ -345,10 +345,10 @@ export function PluginPage() { -
+
setLoadUrl(e.currentTarget.value)} > Date: Mon, 23 Sep 2024 14:18:32 +0800 Subject: [PATCH 28/36] chore: css --- app/components/plugin.module.scss | 1 - 1 file changed, 1 deletion(-) diff --git a/app/components/plugin.module.scss b/app/components/plugin.module.scss index 4b0e990e98f..99a0898960e 100644 --- a/app/components/plugin.module.scss +++ b/app/components/plugin.module.scss @@ -17,7 +17,6 @@ .plugin-schema { display: flex; justify-content: flex-end; - flex-wrap: wrap; flex-direction: row; input { From d95d509046392996941b2e757f795e53ae7f4b38 Mon Sep 17 00:00:00 2001 From: lyf <1910527151@qq.com> Date: Mon, 23 Sep 2024 15:43:36 +0800 Subject: [PATCH 29/36] fex --- app/components/markdown.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/app/components/markdown.tsx b/app/components/markdown.tsx index 17823300013..4f1b0ed24ea 100644 --- a/app/components/markdown.tsx +++ b/app/components/markdown.tsx @@ -128,7 +128,6 @@ export function PreCode(props: { children: any }) { className="copy-code-button" onClick={() => { if (ref.current) { - // const code = ref.current.innerText; copyToClipboard( ref.current.querySelector("code")?.innerText ?? "", ); From ed20fd296292bf4787145086b9d23a5920ae237d Mon Sep 17 00:00:00 2001 From: lloydzhou Date: Mon, 23 Sep 2024 20:00:07 +0800 Subject: [PATCH 30/36] 1. add buildin plugin; 2. remove `usingProxy` --- app/components/plugin.tsx | 37 ++++------------------------ app/store/plugin.ts | 52 +++++++++++++++++++++++++++++++++++---- public/plugins.json | 17 +++++++++++++ 3 files changed, 69 insertions(+), 37 deletions(-) create mode 100644 public/plugins.json diff --git a/app/components/plugin.tsx b/app/components/plugin.tsx index cf4ae946ef6..73f0db64ebf 100644 --- a/app/components/plugin.tsx +++ b/app/components/plugin.tsx @@ -12,7 +12,6 @@ import EditIcon from "../icons/edit.svg"; import AddIcon from "../icons/add.svg"; import CloseIcon from "../icons/close.svg"; import DeleteIcon from "../icons/delete.svg"; -import EyeIcon from "../icons/eye.svg"; import ConfirmIcon from "../icons/confirm.svg"; import ReloadIcon from "../icons/reload.svg"; import GithubIcon from "../icons/github.svg"; @@ -29,7 +28,6 @@ import { import Locale from "../locales"; import { useNavigate } from "react-router-dom"; import { useState } from "react"; -import { getClientConfig } from "../config/client"; export function PluginPage() { const navigate = useNavigate(); @@ -209,19 +207,11 @@ export function PluginPage() {
- {m.builtin ? ( - } - text={Locale.Plugin.Item.View} - onClick={() => setEditingPluginId(m.id)} - /> - ) : ( - } - text={Locale.Plugin.Item.Edit} - onClick={() => setEditingPluginId(m.id)} - /> - )} + } + text={Locale.Plugin.Item.Edit} + onClick={() => setEditingPluginId(m.id)} + /> {!m.builtin && ( } @@ -325,23 +315,6 @@ export function PluginPage() { > )} - {!getClientConfig()?.isApp && ( - - { - pluginStore.updatePlugin(editingPlugin.id, (plugin) => { - plugin.usingProxy = e.currentTarget.checked; - }); - }} - > - - )} diff --git a/app/store/plugin.ts b/app/store/plugin.ts index 44679cbdc25..48930384dbc 100644 --- a/app/store/plugin.ts +++ b/app/store/plugin.ts @@ -2,8 +2,12 @@ import OpenAPIClientAxios from "openapi-client-axios"; import { StoreKey } from "../constant"; import { nanoid } from "nanoid"; import { createPersistStore } from "../utils/store"; +import { getClientConfig } from "../config/client"; import yaml from "js-yaml"; import { adapter } from "../utils"; +import { useAccessStore } from "./access"; + +const isApp = getClientConfig()?.buildMode === "export"; export type Plugin = { id: string; @@ -16,7 +20,6 @@ export type Plugin = { authLocation?: string; authHeader?: string; authToken?: string; - usingProxy?: boolean; }; export type FunctionToolItem = { @@ -46,17 +49,24 @@ export const FunctionToolService = { plugin?.authType == "basic" ? `Basic ${plugin?.authToken}` : plugin?.authType == "bearer" - ? ` Bearer ${plugin?.authToken}` + ? `Bearer ${plugin?.authToken}` : plugin?.authToken; const authLocation = plugin?.authLocation || "header"; const definition = yaml.load(plugin.content) as any; const serverURL = definition?.servers?.[0]?.url; - const baseURL = !!plugin?.usingProxy ? "/api/proxy" : serverURL; + const baseURL = !isApp ? "/api/proxy" : serverURL; const headers: Record = { - "X-Base-URL": !!plugin?.usingProxy ? serverURL : undefined, + "X-Base-URL": !isApp ? serverURL : undefined, }; if (authLocation == "header") { headers[headerName] = tokenValue; + // try using openaiApiKey for Dalle3 Plugin. + if (!tokenValue && plugin.id === "dalle3") { + const openaiApiKey = useAccessStore.getState().openaiApiKey; + if (openaiApiKey) { + headers[headerName] = `Bearer ${openaiApiKey}`; + } + } } const api = new OpenAPIClientAxios({ definition: yaml.load(plugin.content) as any, @@ -165,7 +175,7 @@ export const usePluginStore = createPersistStore( (set, get) => ({ create(plugin?: Partial) { const plugins = get().plugins; - const id = nanoid(); + const id = plugin?.id || nanoid(); plugins[id] = { ...createEmptyPlugin(), ...plugin, @@ -220,5 +230,37 @@ export const usePluginStore = createPersistStore( { name: StoreKey.Plugin, version: 1, + onRehydrateStorage(state) { + console.log("onRehydrateStorage", state); + // Skip store rehydration on server side + if (typeof window === "undefined") { + return; + } + + fetch("./plugins.json") + .then((res) => res.json()) + .then((res) => { + Promise.all( + res.map((item: any) => + fetch(item.schema) + .then((res) => res.text()) + .then((content) => ({ + ...item, + content, + })), + ), + ).then((builtinPlugins: any) => { + builtinPlugins.forEach((item: any) => { + const plugin = state.create(item); + state.updatePlugin(plugin.id, (plugin) => { + const tool = FunctionToolService.add(plugin, true); + plugin.title = tool.api.definition.info.title; + plugin.version = tool.api.definition.info.version; + plugin.builtin = true; + }); + }); + }); + }); + }, }, ); diff --git a/public/plugins.json b/public/plugins.json new file mode 100644 index 00000000000..c4d7ec46a05 --- /dev/null +++ b/public/plugins.json @@ -0,0 +1,17 @@ +[ + { + "id": "dalle3", + "name": "Dalle3", + "schema": "https://ghp.ci/https://raw.githubusercontent.com/ChatGPTNextWeb/NextChat-Awesome-Plugins/main/plugins/dalle/openapi.json" + }, + { + "id": "arxivsearch", + "name": "ArxivSearch", + "schema": "https://ghp.ci/https://raw.githubusercontent.com/ChatGPTNextWeb/NextChat-Awesome-Plugins/main/plugins/arxivsearch/openapi.json" + }, + { + "id": "duckduckgolite", + "name": "DuckDuckGoLiteSearch", + "schema": "https://ghp.ci/https://raw.githubusercontent.com/ChatGPTNextWeb/NextChat-Awesome-Plugins/main/plugins/duckduckgolite/openapi.json" + } +] From 90e7b5aecf7bb4e85cd848bb9f24ffa2688874fb Mon Sep 17 00:00:00 2001 From: lloydzhou Date: Mon, 23 Sep 2024 20:20:15 +0800 Subject: [PATCH 31/36] try using openai api key for dalle-3 plugin --- app/store/plugin.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/app/store/plugin.ts b/app/store/plugin.ts index 48930384dbc..ae31dde4e20 100644 --- a/app/store/plugin.ts +++ b/app/store/plugin.ts @@ -60,12 +60,12 @@ export const FunctionToolService = { }; if (authLocation == "header") { headers[headerName] = tokenValue; - // try using openaiApiKey for Dalle3 Plugin. - if (!tokenValue && plugin.id === "dalle3") { - const openaiApiKey = useAccessStore.getState().openaiApiKey; - if (openaiApiKey) { - headers[headerName] = `Bearer ${openaiApiKey}`; - } + } + // try using openaiApiKey for Dalle3 Plugin. + if (!tokenValue && plugin.id === "dalle3") { + const openaiApiKey = useAccessStore.getState().openaiApiKey; + if (openaiApiKey) { + headers[headerName] = `Bearer ${openaiApiKey}`; } } const api = new OpenAPIClientAxios({ From f9f99639db6a759ad108d27a7fb18641673e55d9 Mon Sep 17 00:00:00 2001 From: lloydzhou Date: Tue, 24 Sep 2024 12:59:21 +0800 Subject: [PATCH 32/36] update --- app/store/plugin.ts | 37 +++++++++++++++++++++---------------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/app/store/plugin.ts b/app/store/plugin.ts index ae31dde4e20..84ae0816e41 100644 --- a/app/store/plugin.ts +++ b/app/store/plugin.ts @@ -7,7 +7,7 @@ import yaml from "js-yaml"; import { adapter } from "../utils"; import { useAccessStore } from "./access"; -const isApp = getClientConfig()?.buildMode === "export"; +const isApp = getClientConfig()?.isApp; export type Plugin = { id: string; @@ -231,7 +231,6 @@ export const usePluginStore = createPersistStore( name: StoreKey.Plugin, version: 1, onRehydrateStorage(state) { - console.log("onRehydrateStorage", state); // Skip store rehydration on server side if (typeof window === "undefined") { return; @@ -242,23 +241,29 @@ export const usePluginStore = createPersistStore( .then((res) => { Promise.all( res.map((item: any) => - fetch(item.schema) - .then((res) => res.text()) - .then((content) => ({ - ...item, - content, - })), + // skip get schema + state.get(item.id) + ? item + : fetch(item.schema) + .then((res) => res.text()) + .then((content) => ({ + ...item, + content, + })) + .catch((e) => item), ), ).then((builtinPlugins: any) => { - builtinPlugins.forEach((item: any) => { - const plugin = state.create(item); - state.updatePlugin(plugin.id, (plugin) => { - const tool = FunctionToolService.add(plugin, true); - plugin.title = tool.api.definition.info.title; - plugin.version = tool.api.definition.info.version; - plugin.builtin = true; + builtinPlugins + .filter((item: any) => item?.content) + .forEach((item: any) => { + const plugin = state.create(item); + state.updatePlugin(plugin.id, (plugin) => { + const tool = FunctionToolService.add(plugin, true); + plugin.title = tool.api.definition.info.title; + plugin.version = tool.api.definition.info.version; + plugin.builtin = true; + }); }); - }); }); }); }, From 6c8143b7de54724ce8e7e3d1d40bd2052cce25e3 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Tue, 24 Sep 2024 15:15:08 +0800 Subject: [PATCH 33/36] =?UTF-8?q?feat:=20=E5=85=A8=E5=B1=80=E8=AE=BE?= =?UTF-8?q?=E7=BD=AE=E6=98=AF=E5=90=A6=E5=90=AF=E7=94=A8artifacts?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/components/markdown.tsx | 5 ++++- app/components/mask.tsx | 32 +++++++++++++++++--------------- app/components/settings.tsx | 14 ++++++++++++++ app/store/config.ts | 2 ++ 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/app/components/markdown.tsx b/app/components/markdown.tsx index 4f1b0ed24ea..c0833caf72b 100644 --- a/app/components/markdown.tsx +++ b/app/components/markdown.tsx @@ -21,6 +21,7 @@ import { } from "./artifacts"; import { useChatStore } from "../store"; import { IconButton } from "./button"; +import { useAppConfig } from "../store/config"; export function Mermaid(props: { code: string }) { const ref = useRef(null); @@ -92,7 +93,9 @@ export function PreCode(props: { children: any }) { } }, 600); - const enableArtifacts = session.mask?.enableArtifacts !== false; + const config = useAppConfig(); + const enableArtifacts = + session.mask?.enableArtifacts !== false && config.enableArtifacts; //Wrap the paragraph for plain-text useEffect(() => { diff --git a/app/components/mask.tsx b/app/components/mask.tsx index e4dd90826c7..c60e7a528fe 100644 --- a/app/components/mask.tsx +++ b/app/components/mask.tsx @@ -166,21 +166,23 @@ export function MaskConfig(props: { > - - { - props.updateMask((mask) => { - mask.enableArtifacts = e.currentTarget.checked; - }); - }} - > - + {globalConfig.enableArtifacts && ( + + { + props.updateMask((mask) => { + mask.enableArtifacts = e.currentTarget.checked; + }); + }} + > + + )} {!props.shouldSyncFromGlobal ? ( + + + + updateConfig( + (config) => + (config.enableArtifacts = e.currentTarget.checked), + ) + } + > + diff --git a/app/store/config.ts b/app/store/config.ts index 615cc8e82a3..3dcd4d86b80 100644 --- a/app/store/config.ts +++ b/app/store/config.ts @@ -50,6 +50,8 @@ export const DEFAULT_CONFIG = { enableAutoGenerateTitle: true, sidebarWidth: DEFAULT_SIDEBAR_WIDTH, + enableArtifacts: true, // show artifacts config + disablePromptHint: false, dontShowMaskSplashScreen: false, // dont show splash screen when create chat From 269d064e0a7b7b3690cc9aa0f3204960f1bee912 Mon Sep 17 00:00:00 2001 From: DDMeaqua Date: Tue, 24 Sep 2024 15:21:27 +0800 Subject: [PATCH 34/36] fix: #5450 --- app/components/settings.tsx | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/app/components/settings.tsx b/app/components/settings.tsx index 8f90c4c36a4..fcb106385da 100644 --- a/app/components/settings.tsx +++ b/app/components/settings.tsx @@ -1466,9 +1466,12 @@ export function Settings() { > - + From 6c37d04591d0fdfef130425346f69bd5d7ce3843 Mon Sep 17 00:00:00 2001 From: lloydzhou Date: Tue, 24 Sep 2024 18:45:20 +0800 Subject: [PATCH 35/36] auto play video/audio --- app/components/markdown.tsx | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/app/components/markdown.tsx b/app/components/markdown.tsx index 4f1b0ed24ea..d6765f263be 100644 --- a/app/components/markdown.tsx +++ b/app/components/markdown.tsx @@ -279,6 +279,20 @@ function _MarkDownContent(props: { content: string }) { p: (pProps) =>

, a: (aProps) => { const href = aProps.href || ""; + if (/\.(aac|mp3|opus|wav)$/.test(href)) { + return ( +

+ +
+ ); + } + if (/\.(3gp|3g2|webm|ogv|mpeg|mp4|avi)$/.test(href)) { + return ( + + ); + } const isInternal = /^\/#/i.test(href); const target = isInternal ? "_self" : aProps.target ?? "_blank"; return ; From dbabb2c4030a96bb01aee5da35decef6f3328d6b Mon Sep 17 00:00:00 2001 From: lloydzhou Date: Tue, 24 Sep 2024 18:48:55 +0800 Subject: [PATCH 36/36] auto play video/audio --- app/components/markdown.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/components/markdown.tsx b/app/components/markdown.tsx index d6765f263be..6c73d9ade51 100644 --- a/app/components/markdown.tsx +++ b/app/components/markdown.tsx @@ -288,7 +288,7 @@ function _MarkDownContent(props: { content: string }) { } if (/\.(3gp|3g2|webm|ogv|mpeg|mp4|avi)$/.test(href)) { return ( -