diff --git a/app/client/api.ts b/app/client/api.ts index 4b39fbfaed2..9133e2a6431 100644 --- a/app/client/api.ts +++ b/app/client/api.ts @@ -14,9 +14,17 @@ export type MessageRole = (typeof ROLES)[number]; export const Models = ["gpt-3.5-turbo", "gpt-4"] as const; export type ChatModel = ModelType; +export interface MultimodalContent { + type: "text" | "image_url"; + text?: string; + image_url?: { + url: string; + }; +} + export interface RequestMessage { role: MessageRole; - content: string; + content: string | MultimodalContent[]; } export interface LLMConfig { diff --git a/app/client/platforms/google.ts b/app/client/platforms/google.ts index 6832400ca58..848e5cd3f0d 100644 --- a/app/client/platforms/google.ts +++ b/app/client/platforms/google.ts @@ -3,6 +3,12 @@ import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; import { getClientConfig } from "@/app/config/client"; import { DEFAULT_API_HOST } from "@/app/constant"; +import { + getMessageTextContent, + getMessageImages, + isVisionModel, +} from "@/app/utils"; + export class GeminiProApi implements LLMApi { extractMessage(res: any) { console.log("[Response] gemini-pro response: ", res); @@ -15,10 +21,33 @@ export class GeminiProApi implements LLMApi { } async chat(options: ChatOptions): Promise { // const apiClient = this; - const messages = options.messages.map((v) => ({ - role: v.role.replace("assistant", "model").replace("system", "user"), - parts: [{ text: v.content }], - })); + const visionModel = isVisionModel(options.config.model); + let multimodal = false; + const messages = options.messages.map((v) => { + let parts: any[] = [{ text: getMessageTextContent(v) }]; + if (visionModel) { + const images = getMessageImages(v); + if (images.length > 0) { + multimodal = true; + parts = parts.concat( + images.map((image) => { + const imageType = image.split(";")[0].split(":")[1]; + const imageData = image.split(",")[1]; + return { + inline_data: { + mime_type: imageType, + data: imageData, + }, + }; + }), + ); + } + } + return { + role: v.role.replace("assistant", "model").replace("system", "user"), + parts: parts, + }; + }); // google requires that role in neighboring messages must not be the same for (let i = 0; i < messages.length - 1; ) { @@ -33,7 +62,9 @@ export class GeminiProApi implements LLMApi { i++; } } - + // if (visionModel && messages.length > 1) { + // options.onError?.(new Error("Multiturn chat is not enabled for models/gemini-pro-vision")); + // } const modelConfig = { ...useAppConfig.getState().modelConfig, ...useChatStore.getState().currentSession().mask.modelConfig, @@ -80,13 +111,16 @@ export class GeminiProApi implements LLMApi { const controller = new AbortController(); options.onController?.(controller); try { - let chatPath = this.path(Google.ChatPath); + let googleChatPath = visionModel + ? Google.VisionChatPath + : Google.ChatPath; + let chatPath = this.path(googleChatPath); // let baseUrl = accessStore.googleUrl; if (!baseUrl) { baseUrl = isApp - ? DEFAULT_API_HOST + "/api/proxy/google/" + Google.ChatPath + ? DEFAULT_API_HOST + "/api/proxy/google/" + googleChatPath : chatPath; } @@ -152,6 +186,19 @@ export class GeminiProApi implements LLMApi { value, }): Promise { if (done) { + if (response.status !== 200) { + try { + let data = JSON.parse(ensureProperEnding(partialData)); + if (data && data[0].error) { + options.onError?.(new Error(data[0].error.message)); + } else { + options.onError?.(new Error("Request failed")); + } + } catch (_) { + options.onError?.(new Error("Request failed")); + } + } + console.log("Stream complete"); // options.onFinish(responseText + remainText); finished = true; diff --git a/app/client/platforms/openai.ts b/app/client/platforms/openai.ts index 3c3a5180198..919716bfb0a 100644 --- a/app/client/platforms/openai.ts +++ b/app/client/platforms/openai.ts @@ -9,7 +9,14 @@ import { } from "@/app/constant"; import { useAccessStore, useAppConfig, useChatStore } from "@/app/store"; -import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api"; +import { + ChatOptions, + getHeaders, + LLMApi, + LLMModel, + LLMUsage, + MultimodalContent, +} from "../api"; import Locale from "../../locales"; import { EventStreamContentType, @@ -18,6 +25,11 @@ import { import { prettyObject } from "@/app/utils/format"; import { getClientConfig } from "@/app/config/client"; import { makeAzurePath } from "@/app/azure"; +import { + getMessageTextContent, + getMessageImages, + isVisionModel, +} from "@/app/utils"; export interface OpenAIListModelResponse { object: string; @@ -72,9 +84,10 @@ export class ChatGPTApi implements LLMApi { } async chat(options: ChatOptions) { + const visionModel = isVisionModel(options.config.model); const messages = options.messages.map((v) => ({ role: v.role, - content: v.content, + content: visionModel ? v.content : getMessageTextContent(v), })); const modelConfig = { diff --git a/app/components/chat.module.scss b/app/components/chat.module.scss index 16790ccb1db..e7619e92b89 100644 --- a/app/components/chat.module.scss +++ b/app/components/chat.module.scss @@ -1,5 +1,47 @@ @import "../styles/animation.scss"; +.attach-images { + position: absolute; + left: 30px; + bottom: 32px; + display: flex; +} + +.attach-image { + cursor: default; + width: 64px; + height: 64px; + border: rgba($color: #888, $alpha: 0.2) 1px solid; + border-radius: 5px; + margin-right: 10px; + background-size: cover; + background-position: center; + background-color: var(--white); + + .attach-image-mask { + width: 100%; + height: 100%; + opacity: 0; + transition: all ease 0.2s; + } + + .attach-image-mask:hover { + opacity: 1; + } + + .delete-image { + width: 24px; + height: 24px; + cursor: pointer; + display: flex; + align-items: center; + justify-content: center; + border-radius: 5px; + float: right; + background-color: var(--white); + } +} + .chat-input-actions { display: flex; flex-wrap: wrap; @@ -189,12 +231,10 @@ animation: slide-in ease 0.3s; - $linear: linear-gradient( - to right, - rgba(0, 0, 0, 0), - rgba(0, 0, 0, 1), - rgba(0, 0, 0, 0) - ); + $linear: linear-gradient(to right, + rgba(0, 0, 0, 0), + rgba(0, 0, 0, 1), + rgba(0, 0, 0, 0)); mask-image: $linear; @mixin show { @@ -327,7 +367,7 @@ } } -.chat-message-user > .chat-message-container { +.chat-message-user>.chat-message-container { align-items: flex-end; } @@ -349,6 +389,7 @@ padding: 7px; } } + /* Specific styles for iOS devices */ @media screen and (max-device-width: 812px) and (-webkit-min-device-pixel-ratio: 2) { @supports (-webkit-touch-callout: none) { @@ -381,6 +422,64 @@ transition: all ease 0.3s; } +.chat-message-item-image { + width: 100%; + margin-top: 10px; +} + +.chat-message-item-images { + width: 100%; + display: grid; + justify-content: left; + grid-gap: 10px; + grid-template-columns: repeat(var(--image-count), auto); + margin-top: 10px; +} + +.chat-message-item-image-multi { + object-fit: cover; + background-size: cover; + background-position: center; + background-repeat: no-repeat; +} + +.chat-message-item-image, +.chat-message-item-image-multi { + box-sizing: border-box; + border-radius: 10px; + border: rgba($color: #888, $alpha: 0.2) 1px solid; +} + + +@media only screen and (max-width: 600px) { + $calc-image-width: calc(100vw/3*2/var(--image-count)); + + .chat-message-item-image-multi { + width: $calc-image-width; + height: $calc-image-width; + } + + .chat-message-item-image { + max-width: calc(100vw/3*2); + } +} + +@media screen and (min-width: 600px) { + $max-image-width: calc(calc(1200px - var(--sidebar-width))/3*2/var(--image-count)); + $image-width: calc(calc(var(--window-width) - var(--sidebar-width))/3*2/var(--image-count)); + + .chat-message-item-image-multi { + width: $image-width; + height: $image-width; + max-width: $max-image-width; + max-height: $max-image-width; + } + + .chat-message-item-image { + max-width: calc(calc(1200px - var(--sidebar-width))/3*2); + } +} + .chat-message-action-date { font-size: 12px; opacity: 0.2; @@ -395,7 +494,7 @@ z-index: 1; } -.chat-message-user > .chat-message-container > .chat-message-item { +.chat-message-user>.chat-message-container>.chat-message-item { background-color: var(--second); &:hover { @@ -460,6 +559,7 @@ @include single-line(); } + .hint-content { font-size: 12px; @@ -474,15 +574,26 @@ } .chat-input-panel-inner { + cursor: text; display: flex; flex: 1; + border-radius: 10px; + border: var(--border-in-light); +} + +.chat-input-panel-inner-attach { + padding-bottom: 80px; +} + +.chat-input-panel-inner:has(.chat-input:focus) { + border: 1px solid var(--primary); } .chat-input { height: 100%; width: 100%; border-radius: 10px; - border: var(--border-in-light); + border: none; box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.03); background-color: var(--white); color: var(--black); @@ -494,9 +605,7 @@ min-height: 68px; } -.chat-input:focus { - border: 1px solid var(--primary); -} +.chat-input:focus {} .chat-input-send { background-color: var(--primary); @@ -515,4 +624,4 @@ .chat-input-send { bottom: 30px; } -} +} \ No newline at end of file diff --git a/app/components/chat.tsx b/app/components/chat.tsx index 39abdd97b24..369d84c3326 100644 --- a/app/components/chat.tsx +++ b/app/components/chat.tsx @@ -15,6 +15,7 @@ import ExportIcon from "../icons/share.svg"; import ReturnIcon from "../icons/return.svg"; import CopyIcon from "../icons/copy.svg"; import LoadingIcon from "../icons/three-dots.svg"; +import LoadingButtonIcon from "../icons/loading.svg"; import PromptIcon from "../icons/prompt.svg"; import MaskIcon from "../icons/mask.svg"; import MaxIcon from "../icons/max.svg"; @@ -27,6 +28,7 @@ import PinIcon from "../icons/pin.svg"; import EditIcon from "../icons/rename.svg"; import ConfirmIcon from "../icons/confirm.svg"; import CancelIcon from "../icons/cancel.svg"; +import ImageIcon from "../icons/image.svg"; import LightIcon from "../icons/light.svg"; import DarkIcon from "../icons/dark.svg"; @@ -53,6 +55,10 @@ import { selectOrCopy, autoGrowTextArea, useMobileScreen, + getMessageTextContent, + getMessageImages, + isVisionModel, + compressImage, } from "../utils"; import dynamic from "next/dynamic"; @@ -89,6 +95,7 @@ import { prettyObject } from "../utils/format"; import { ExportMessageModal } from "./exporter"; import { getClientConfig } from "../config/client"; import { useAllModels } from "../utils/hooks"; +import { MultimodalContent } from "../client/api"; const Markdown = dynamic(async () => (await import("./markdown")).Markdown, { loading: () => , @@ -406,10 +413,14 @@ function useScrollToBottom() { } export function ChatActions(props: { + uploadImage: () => void; + setAttachImages: (images: string[]) => void; + setUploading: (uploading: boolean) => void; showPromptModal: () => void; scrollToBottom: () => void; showPromptHints: () => void; hitBottom: boolean; + uploading: boolean; }) { const config = useAppConfig(); const navigate = useNavigate(); @@ -437,8 +448,16 @@ export function ChatActions(props: { [allModels], ); const [showModelSelector, setShowModelSelector] = useState(false); + const [showUploadImage, setShowUploadImage] = useState(false); useEffect(() => { + const show = isVisionModel(currentModel); + setShowUploadImage(show); + if (!show) { + props.setAttachImages([]); + props.setUploading(false); + } + // if current model is not available // switch to first available model const isUnavaliableModel = !models.some((m) => m.name === currentModel); @@ -475,6 +494,13 @@ export function ChatActions(props: { /> )} + {showUploadImage && ( + : } + /> + )} void }) { ); } +export function DeleteImageButton(props: { deleteImage: () => void }) { + return ( +
+ +
+ ); +} + function _Chat() { type RenderMessage = ChatMessage & { preview?: boolean }; @@ -628,6 +662,8 @@ function _Chat() { const [hitBottom, setHitBottom] = useState(true); const isMobileScreen = useMobileScreen(); const navigate = useNavigate(); + const [attachImages, setAttachImages] = useState([]); + const [uploading, setUploading] = useState(false); // prompt hints const promptStore = usePromptStore(); @@ -705,7 +741,10 @@ function _Chat() { return; } setIsLoading(true); - chatStore.onUserInput(userInput).then(() => setIsLoading(false)); + chatStore + .onUserInput(userInput, attachImages) + .then(() => setIsLoading(false)); + setAttachImages([]); localStorage.setItem(LAST_INPUT_KEY, userInput); setUserInput(""); setPromptHints([]); @@ -783,9 +822,9 @@ function _Chat() { }; const onRightClick = (e: any, message: ChatMessage) => { // copy to clipboard - if (selectOrCopy(e.currentTarget, message.content)) { + if (selectOrCopy(e.currentTarget, getMessageTextContent(message))) { if (userInput.length === 0) { - setUserInput(message.content); + setUserInput(getMessageTextContent(message)); } e.preventDefault(); @@ -853,7 +892,9 @@ function _Chat() { // resend the message setIsLoading(true); - chatStore.onUserInput(userMessage.content).then(() => setIsLoading(false)); + const textContent = getMessageTextContent(userMessage); + const images = getMessageImages(userMessage); + chatStore.onUserInput(textContent, images).then(() => setIsLoading(false)); inputRef.current?.focus(); }; @@ -1048,6 +1089,51 @@ function _Chat() { // eslint-disable-next-line react-hooks/exhaustive-deps }, []); + async function uploadImage() { + const images: string[] = []; + images.push(...attachImages); + + images.push( + ...(await new Promise((res, rej) => { + const fileInput = document.createElement("input"); + fileInput.type = "file"; + fileInput.accept = + "image/png, image/jpeg, image/webp, image/heic, image/heif"; + fileInput.multiple = true; + fileInput.onchange = (event: any) => { + setUploading(true); + const files = event.target.files; + const imagesData: string[] = []; + for (let i = 0; i < files.length; i++) { + const file = event.target.files[i]; + compressImage(file, 256 * 1024) + .then((dataUrl) => { + imagesData.push(dataUrl); + if ( + imagesData.length === 3 || + imagesData.length === files.length + ) { + setUploading(false); + res(imagesData); + } + }) + .catch((e) => { + setUploading(false); + rej(e); + }); + } + }; + fileInput.click(); + })), + ); + + const imagesLength = images.length; + if (imagesLength > 3) { + images.splice(3, imagesLength - 3); + } + setAttachImages(images); + } + return (
@@ -1154,15 +1240,29 @@ function _Chat() { onClick={async () => { const newMessage = await showPrompt( Locale.Chat.Actions.Edit, - message.content, + getMessageTextContent(message), 10, ); + let newContent: string | MultimodalContent[] = + newMessage; + const images = getMessageImages(message); + if (images.length > 0) { + newContent = [{ type: "text", text: newMessage }]; + for (let i = 0; i < images.length; i++) { + newContent.push({ + type: "image_url", + image_url: { + url: images[i], + }, + }); + } + } chatStore.updateCurrentSession((session) => { const m = session.mask.context .concat(session.messages) .find((m) => m.id === message.id); if (m) { - m.content = newMessage; + m.content = newContent; } }); }} @@ -1217,7 +1317,11 @@ function _Chat() { } - onClick={() => copyToClipboard(message.content)} + onClick={() => + copyToClipboard( + getMessageTextContent(message), + ) + } /> )} @@ -1232,7 +1336,7 @@ function _Chat() { )}
onRightClick(e, message)} onDoubleClickCapture={() => { if (!isMobileScreen) return; - setUserInput(message.content); + setUserInput(getMessageTextContent(message)); }} fontSize={fontSize} parentRef={scrollRef} defaultShow={i >= messages.length - 6} /> + {getMessageImages(message).length == 1 && ( + + )} + {getMessageImages(message).length > 1 && ( +
+ {getMessageImages(message).map((image, index) => { + return ( + + ); + })} +
+ )}
@@ -1266,9 +1400,13 @@ function _Chat() { setShowPromptModal(true)} scrollToBottom={scrollToBottom} hitBottom={hitBottom} + uploading={uploading} showPromptHints={() => { // Click again to close if (promptHints.length > 0) { @@ -1281,8 +1419,16 @@ function _Chat() { onSearch(""); }} /> -
+