Skip to content

Commit

Permalink
Add vision support (#4076)
Browse files Browse the repository at this point in the history
  • Loading branch information
TheRamU authored Feb 20, 2024
1 parent 05b6d98 commit e2da340
Show file tree
Hide file tree
Showing 16 changed files with 650 additions and 73 deletions.
10 changes: 9 additions & 1 deletion app/client/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,17 @@ export type MessageRole = (typeof ROLES)[number];
export const Models = ["gpt-3.5-turbo", "gpt-4"] as const;
export type ChatModel = ModelType;

export interface MultimodalContent {
type: "text" | "image_url";
text?: string;
image_url?: {
url: string;
};
}

export interface RequestMessage {
role: MessageRole;
content: string;
content: string | MultimodalContent[];
}

export interface LLMConfig {
Expand Down
61 changes: 54 additions & 7 deletions app/client/platforms/google.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@ import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";
import { getClientConfig } from "@/app/config/client";
import { DEFAULT_API_HOST } from "@/app/constant";
import {
getMessageTextContent,
getMessageImages,
isVisionModel,
} from "@/app/utils";

export class GeminiProApi implements LLMApi {
extractMessage(res: any) {
console.log("[Response] gemini-pro response: ", res);
Expand All @@ -15,10 +21,33 @@ export class GeminiProApi implements LLMApi {
}
async chat(options: ChatOptions): Promise<void> {
// const apiClient = this;
const messages = options.messages.map((v) => ({
role: v.role.replace("assistant", "model").replace("system", "user"),
parts: [{ text: v.content }],
}));
const visionModel = isVisionModel(options.config.model);
let multimodal = false;
const messages = options.messages.map((v) => {
let parts: any[] = [{ text: getMessageTextContent(v) }];
if (visionModel) {
const images = getMessageImages(v);
if (images.length > 0) {
multimodal = true;
parts = parts.concat(
images.map((image) => {
const imageType = image.split(";")[0].split(":")[1];
const imageData = image.split(",")[1];
return {
inline_data: {
mime_type: imageType,
data: imageData,
},
};
}),
);
}
}
return {
role: v.role.replace("assistant", "model").replace("system", "user"),
parts: parts,
};
});

// google requires that role in neighboring messages must not be the same
for (let i = 0; i < messages.length - 1; ) {
Expand All @@ -33,7 +62,9 @@ export class GeminiProApi implements LLMApi {
i++;
}
}

// if (visionModel && messages.length > 1) {
// options.onError?.(new Error("Multiturn chat is not enabled for models/gemini-pro-vision"));
// }
const modelConfig = {
...useAppConfig.getState().modelConfig,
...useChatStore.getState().currentSession().mask.modelConfig,
Expand Down Expand Up @@ -80,13 +111,16 @@ export class GeminiProApi implements LLMApi {
const controller = new AbortController();
options.onController?.(controller);
try {
let chatPath = this.path(Google.ChatPath);
let googleChatPath = visionModel
? Google.VisionChatPath
: Google.ChatPath;
let chatPath = this.path(googleChatPath);

// let baseUrl = accessStore.googleUrl;

if (!baseUrl) {
baseUrl = isApp
? DEFAULT_API_HOST + "/api/proxy/google/" + Google.ChatPath
? DEFAULT_API_HOST + "/api/proxy/google/" + googleChatPath
: chatPath;
}

Expand Down Expand Up @@ -152,6 +186,19 @@ export class GeminiProApi implements LLMApi {
value,
}): Promise<any> {
if (done) {
if (response.status !== 200) {
try {
let data = JSON.parse(ensureProperEnding(partialData));
if (data && data[0].error) {
options.onError?.(new Error(data[0].error.message));
} else {
options.onError?.(new Error("Request failed"));
}
} catch (_) {
options.onError?.(new Error("Request failed"));
}
}

console.log("Stream complete");
// options.onFinish(responseText + remainText);
finished = true;
Expand Down
17 changes: 15 additions & 2 deletions app/client/platforms/openai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@ import {
} from "@/app/constant";
import { useAccessStore, useAppConfig, useChatStore } from "@/app/store";

import { ChatOptions, getHeaders, LLMApi, LLMModel, LLMUsage } from "../api";
import {
ChatOptions,
getHeaders,
LLMApi,
LLMModel,
LLMUsage,
MultimodalContent,
} from "../api";
import Locale from "../../locales";
import {
EventStreamContentType,
Expand All @@ -18,6 +25,11 @@ import {
import { prettyObject } from "@/app/utils/format";
import { getClientConfig } from "@/app/config/client";
import { makeAzurePath } from "@/app/azure";
import {
getMessageTextContent,
getMessageImages,
isVisionModel,
} from "@/app/utils";

export interface OpenAIListModelResponse {
object: string;
Expand Down Expand Up @@ -72,9 +84,10 @@ export class ChatGPTApi implements LLMApi {
}

async chat(options: ChatOptions) {
const visionModel = isVisionModel(options.config.model);
const messages = options.messages.map((v) => ({
role: v.role,
content: v.content,
content: visionModel ? v.content : getMessageTextContent(v),
}));

const modelConfig = {
Expand Down
135 changes: 122 additions & 13 deletions app/components/chat.module.scss
Original file line number Diff line number Diff line change
@@ -1,5 +1,47 @@
@import "../styles/animation.scss";

.attach-images {
position: absolute;
left: 30px;
bottom: 32px;
display: flex;
}

.attach-image {
cursor: default;
width: 64px;
height: 64px;
border: rgba($color: #888, $alpha: 0.2) 1px solid;
border-radius: 5px;
margin-right: 10px;
background-size: cover;
background-position: center;
background-color: var(--white);

.attach-image-mask {
width: 100%;
height: 100%;
opacity: 0;
transition: all ease 0.2s;
}

.attach-image-mask:hover {
opacity: 1;
}

.delete-image {
width: 24px;
height: 24px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
border-radius: 5px;
float: right;
background-color: var(--white);
}
}

.chat-input-actions {
display: flex;
flex-wrap: wrap;
Expand Down Expand Up @@ -189,12 +231,10 @@

animation: slide-in ease 0.3s;

$linear: linear-gradient(
to right,
rgba(0, 0, 0, 0),
rgba(0, 0, 0, 1),
rgba(0, 0, 0, 0)
);
$linear: linear-gradient(to right,
rgba(0, 0, 0, 0),
rgba(0, 0, 0, 1),
rgba(0, 0, 0, 0));
mask-image: $linear;

@mixin show {
Expand Down Expand Up @@ -327,7 +367,7 @@
}
}

.chat-message-user > .chat-message-container {
.chat-message-user>.chat-message-container {
align-items: flex-end;
}

Expand All @@ -349,6 +389,7 @@
padding: 7px;
}
}

/* Specific styles for iOS devices */
@media screen and (max-device-width: 812px) and (-webkit-min-device-pixel-ratio: 2) {
@supports (-webkit-touch-callout: none) {
Expand Down Expand Up @@ -381,6 +422,64 @@
transition: all ease 0.3s;
}

.chat-message-item-image {
width: 100%;
margin-top: 10px;
}

.chat-message-item-images {
width: 100%;
display: grid;
justify-content: left;
grid-gap: 10px;
grid-template-columns: repeat(var(--image-count), auto);
margin-top: 10px;
}

.chat-message-item-image-multi {
object-fit: cover;
background-size: cover;
background-position: center;
background-repeat: no-repeat;
}

.chat-message-item-image,
.chat-message-item-image-multi {
box-sizing: border-box;
border-radius: 10px;
border: rgba($color: #888, $alpha: 0.2) 1px solid;
}


@media only screen and (max-width: 600px) {
$calc-image-width: calc(100vw/3*2/var(--image-count));

.chat-message-item-image-multi {
width: $calc-image-width;
height: $calc-image-width;
}

.chat-message-item-image {
max-width: calc(100vw/3*2);
}
}

@media screen and (min-width: 600px) {
$max-image-width: calc(calc(1200px - var(--sidebar-width))/3*2/var(--image-count));
$image-width: calc(calc(var(--window-width) - var(--sidebar-width))/3*2/var(--image-count));

.chat-message-item-image-multi {
width: $image-width;
height: $image-width;
max-width: $max-image-width;
max-height: $max-image-width;
}

.chat-message-item-image {
max-width: calc(calc(1200px - var(--sidebar-width))/3*2);
}
}

.chat-message-action-date {
font-size: 12px;
opacity: 0.2;
Expand All @@ -395,7 +494,7 @@
z-index: 1;
}

.chat-message-user > .chat-message-container > .chat-message-item {
.chat-message-user>.chat-message-container>.chat-message-item {
background-color: var(--second);

&:hover {
Expand Down Expand Up @@ -460,6 +559,7 @@

@include single-line();
}

.hint-content {
font-size: 12px;

Expand All @@ -474,15 +574,26 @@
}

.chat-input-panel-inner {
cursor: text;
display: flex;
flex: 1;
border-radius: 10px;
border: var(--border-in-light);
}

.chat-input-panel-inner-attach {
padding-bottom: 80px;
}

.chat-input-panel-inner:has(.chat-input:focus) {
border: 1px solid var(--primary);
}

.chat-input {
height: 100%;
width: 100%;
border-radius: 10px;
border: var(--border-in-light);
border: none;
box-shadow: 0 -2px 5px rgba(0, 0, 0, 0.03);
background-color: var(--white);
color: var(--black);
Expand All @@ -494,9 +605,7 @@
min-height: 68px;
}

.chat-input:focus {
border: 1px solid var(--primary);
}
.chat-input:focus {}

.chat-input-send {
background-color: var(--primary);
Expand All @@ -515,4 +624,4 @@
.chat-input-send {
bottom: 30px;
}
}
}
Loading

0 comments on commit e2da340

Please sign in to comment.