Skip to content

Commit

Permalink
token handling
Browse files Browse the repository at this point in the history
  • Loading branch information
yoziru committed Apr 3, 2024
1 parent fa28737 commit ef8c13b
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 103 deletions.
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"ai": "^3.0.15",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
"mistral-tokenizer-js": "^1.0.0",
"next": "14.1.4",
"next-themes": "^0.3.0",
"openai": "^4.30.0",
Expand Down
70 changes: 59 additions & 11 deletions src/app/api/chat/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,20 @@ import {
ReconnectInterval,
} from "eventsource-parser";
import { NextRequest, NextResponse } from "next/server";
import { ChatCompletionAssistantMessageParam, ChatCompletionCreateParamsStreaming, ChatCompletionMessage, ChatCompletionMessageParam, ChatCompletionSystemMessageParam, ChatCompletionUserMessageParam } from "openai/resources/index.mjs";

import {
ChatCompletionAssistantMessageParam,
ChatCompletionCreateParamsStreaming,
ChatCompletionMessageParam,
ChatCompletionSystemMessageParam,
ChatCompletionUserMessageParam,
} from "openai/resources/index.mjs";

import { encodeChat, tokenLimit } from "@/lib/token-counter";

const addSystemMessage = (messages: ChatCompletionMessageParam[], systemPrompt?: string) => {
const addSystemMessage = (
messages: ChatCompletionMessageParam[],
systemPrompt?: string
) => {
// early exit if system prompt is empty
if (!systemPrompt || systemPrompt === "") {
return messages;
Expand All @@ -22,7 +31,7 @@ const addSystemMessage = (messages: ChatCompletionMessageParam[], systemPrompt?:
{
content: systemPrompt,
role: "system",
} ,
},
];
} else if (messages.length === 0) {
// if there are no messages, add the system prompt as the first message
Expand All @@ -46,16 +55,55 @@ const addSystemMessage = (messages: ChatCompletionMessageParam[], systemPrompt?:
return messages;
};

const formatMessages = (messages: ChatCompletionMessageParam[]): ChatCompletionMessageParam[] => {
return messages.map((m) => {
const formatMessages = (
messages: ChatCompletionMessageParam[]
): ChatCompletionMessageParam[] => {
let mappedMessages: ChatCompletionMessageParam[] = [];
let messagesTokenCounts: number[] = [];
const responseTokens = 512;
const tokenLimitRemaining = tokenLimit - responseTokens;
let tokenCount = 0;

messages.forEach((m) => {
if (m.role === "system") {
return { role: "system", content: m.content } as ChatCompletionSystemMessageParam;
mappedMessages.push({
role: "system",
content: m.content,
} as ChatCompletionSystemMessageParam);
} else if (m.role === "user") {
return { role: "user", content: m.content } as ChatCompletionUserMessageParam;
mappedMessages.push({
role: "user",
content: m.content,
} as ChatCompletionUserMessageParam);
} else if (m.role === "assistant") {
mappedMessages.push({
role: "assistant",
content: m.content,
} as ChatCompletionAssistantMessageParam);
} else {
return { role: "assistant", content: m.content } as ChatCompletionAssistantMessageParam;
return;
}

// ignore typing
// tslint:disable-next-line
const messageTokens = encodeChat([m]);
messagesTokenCounts.push(messageTokens);
tokenCount += messageTokens;
});

if (tokenCount <= tokenLimitRemaining) {
return mappedMessages;
}

// remove the middle messages until the token count is below the limit
while (tokenCount > tokenLimitRemaining) {
const middleMessageIndex = Math.floor(messages.length / 2);
const middleMessageTokens = messagesTokenCounts[middleMessageIndex];
mappedMessages.splice(middleMessageIndex, 1);
messagesTokenCounts.splice(middleMessageIndex, 1);
tokenCount -= middleMessageTokens;
}
return mappedMessages;
};

export async function POST(req: NextRequest): Promise<NextResponse> {
Expand Down Expand Up @@ -121,8 +169,8 @@ const getOpenAIStream = async (
// type: "json_object",
// }
// top_p: 0.95,
}
const res = await fetch(apiUrl + "/v1/chat/completions",{
};
const res = await fetch(apiUrl + "/v1/chat/completions", {
headers: headers,
method: "POST",
body: JSON.stringify(chatOptions),
Expand Down
89 changes: 52 additions & 37 deletions src/components/chat/chat-bottombar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ import React from "react";

import { PaperPlaneIcon, StopIcon } from "@radix-ui/react-icons";
import { ChatRequestOptions } from "ai";
import mistralTokenizer from "mistral-tokenizer-js";
import TextareaAutosize from "react-textarea-autosize";


import { tokenLimit } from "@/lib/token-counter";
import { Button } from "../ui/button";

interface ChatBottombarProps {
Expand Down Expand Up @@ -37,45 +40,57 @@ export default function ChatBottombar({
handleSubmit(e as unknown as React.FormEvent<HTMLFormElement>);
}
};
const tokenCount = input ? mistralTokenizer.encode(input).length - 1 : 0;

return (
<div className="p-4 flex justify-between w-full items-center gap-2">
<div key="input" className="w-full relative mb-2 items-center">
<form
onSubmit={handleSubmit}
className="w-full items-center flex relative gap-2"
>
<TextareaAutosize
autoComplete="off"
value={input}
ref={inputRef}
onKeyDown={handleKeyPress}
onChange={handleInputChange}
name="message"
placeholder="Ask vLLM anything..."
className="border-input max-h-20 px-5 py-4 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 w-full border rounded-md flex items-center h-14 resize-none overflow-hidden dark:bg-card/35"
/>
{!isLoading ? (
<Button
className="shrink-0"
variant="secondary"
size="icon"
type="submit"
disabled={isLoading || !input.trim() || !hasSelectedModel}
>
<PaperPlaneIcon className=" w-6 h-6 text-muted-foreground" />
</Button>
) : (
<Button
className="shrink-0"
variant="secondary"
size="icon"
onClick={stop}
>
<StopIcon className="w-6 h-6 text-muted-foreground" />
</Button>
)}
</form>
<div>
<div className="text-xs mt-1 text-muted-foreground w-full text-center">
<span>Enter to send, Shift + Enter for new line</span>
</div>
<div className="p-2 pb-1 flex justify-between w-full items-center ">
<div key="input" className="w-full relative mb-1 items-center">
<form
onSubmit={handleSubmit}
className="w-full items-center flex relative gap-2"
>
<TextareaAutosize
autoComplete="off"
value={input}
ref={inputRef}
onKeyDown={handleKeyPress}
onChange={handleInputChange}
name="message"
placeholder="Ask vLLM anything..."
className="border-input max-h-48 px-4 py-4 text-sm shadow-sm placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:cursor-not-allowed disabled:opacity-50 w-full border rounded-md flex items-center h-14 resize-none overflow-hidden dark:bg-card/35
pr-20"
/>
<div className="text-xs text-muted-foreground w-16 absolute right-16">
{tokenCount > tokenLimit ? (
<span className="text-red-700">
{tokenCount} token{tokenCount == 1 ? "" : "s"}
</span>
) : (
<span>
{tokenCount} token{tokenCount == 1 ? "" : "s"}
</span>
)}
</div>
{!isLoading ? (
<Button
variant="secondary"
size="icon"
type="submit"
disabled={isLoading || !input.trim() || !hasSelectedModel}
>
<PaperPlaneIcon className="w-6 h-6 text-muted-foreground" />
</Button>
) : (
<Button variant="secondary" size="icon" onClick={stop}>
<StopIcon className="w-6 h-6 text-muted-foreground" />
</Button>
)}
</form>
</div>
</div>
</div>
);
Expand Down
6 changes: 1 addition & 5 deletions src/components/chat/chat-layout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@

import React, { useEffect, useState } from "react";

import {
ResizableHandle,
ResizablePanel,
ResizablePanelGroup,
} from "@/components/ui/resizable";
import { ResizablePanel, ResizablePanelGroup } from "@/components/ui/resizable";
import { cn } from "@/lib/utils";
import { Sidebar } from "../sidebar";
import Chat, { ChatProps, ChatTopbarProps } from "./chat";
Expand Down
40 changes: 35 additions & 5 deletions src/components/chat/chat-topbar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
CrossCircledIcon,
DotFilledIcon,
HamburgerMenuIcon,
InfoCircledIcon,
} from "@radix-ui/react-icons";
import { Message } from "ai/react";
import { toast } from "sonner";
Expand All @@ -18,6 +19,7 @@ import {
TooltipProvider,
TooltipTrigger,
} from "@/components/ui/tooltip";
import { encodeChat, tokenLimit } from "@/lib/token-counter";
import { basePath, useHasMounted } from "@/lib/utils";
import { Sidebar } from "../sidebar";
import { ChatOptions } from "./chat-options";
Expand All @@ -42,6 +44,7 @@ export default function ChatTopbar({
const hasMounted = useHasMounted();

const currentModel = chatOptions && chatOptions.selectedModel;
const [error, setError] = React.useState<string | undefined>(undefined);

const fetchData = async () => {
if (!hasMounted) {
Expand All @@ -59,7 +62,6 @@ export default function ChatTopbar({
if (!res.ok) {
const errorResponse = await res.json();
const errorMessage = `Connection to vLLM server failed: ${errorResponse.error} [${res.status} ${res.statusText}]`;
toast.error(errorMessage);
throw new Error(errorMessage);
}

Expand All @@ -70,7 +72,7 @@ export default function ChatTopbar({
setChatOptions({ ...chatOptions, selectedModel: modelNames[0] });
} catch (error) {
setChatOptions({ ...chatOptions, selectedModel: undefined });
console.log(error);
toast.error(error as string);
}
};

Expand All @@ -87,8 +89,10 @@ export default function ChatTopbar({
);
}

const chatTokens = messages.length > 0 ? encodeChat(messages) : 0;

return (
<div className="w-full flex px-4 py-6 items-center justify-between lg:justify-center">
<div className="w-full flex px-4 py-4 items-center justify-between lg:justify-center">
<Sheet>
<SheetTrigger>
<HamburgerMenuIcon className="lg:hidden w-5 h-5" />
Expand All @@ -105,8 +109,8 @@ export default function ChatTopbar({
</SheetContent>
</Sheet>

<div className="flex items-center gap-4">
<div className="w-full gap-1 flex justify-between items-center">
<div className="flex justify-center lg:justify-between gap-4 w-full">
<div className="gap-1 flex items-center">
{currentModel !== undefined && (
<>
{isLoading ? (
Expand Down Expand Up @@ -141,6 +145,32 @@ export default function ChatTopbar({
</>
)}
</div>
<div className="flex items-end gap-2">
{chatTokens > tokenLimit && (
<TooltipProvider>
<Tooltip>
<TooltipTrigger>
<span>
<InfoCircledIcon className="w-4 h-4 text-blue-500" />
</span>
</TooltipTrigger>
<TooltipContent
sideOffset={4}
className="bg-white dark:bg-gray-800 text-gray-900 dark:text-gray-100 rounded-sm text-xs"
>
<p className="text-gray-500">
Token limit exceeded. Truncating middle messages.
</p>
</TooltipContent>
</Tooltip>
</TooltipProvider>
)}
{messages.length > 0 && (
<span className="text-xs text-gray-500">
{chatTokens} / {tokenLimit} token{chatTokens > 1 ? "s" : ""}
</span>
)}
</div>
</div>
</div>
);
Expand Down
37 changes: 21 additions & 16 deletions src/components/sidebar.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,11 @@ import { Pencil2Icon } from "@radix-ui/react-icons";
import { Message } from "ai/react";
import Image from "next/image";

import { Button } from "@/components/ui/button";
import OllamaLogo from "../../public/ollama.png";
import { ChatOptions } from "./chat/chat-options";
import SidebarTabs from "./sidebar-tabs";
import { RedirectType, redirect } from "next/navigation";
import Link from "next/link";

interface SidebarProps {
isCollapsed: boolean;
onClick?: () => void;
Expand Down Expand Up @@ -126,23 +125,29 @@ export function Sidebar({
>
<div className="sticky left-0 right-0 top-0 z-20 p-1 rounded-sm m-2">
<Link
className="flex justify-between w-full h-10 text-sm font-medium items-center"
className="flex w-full h-10 text-sm font-medium items-center
border border-input bg-background hover:bg-accent hover:text-accent-foreground
px-2 py-2 rounded-sm"
href="/"
onClick={() => {setChatId("");}}
onClick={() => {
setChatId("");
}}
>
<div className="flex gap-3 items-center">
{!isCollapsed && !isMobile && (
<Image
src={OllamaLogo}
alt="AI"
width={14}
height={14}
className="dark:invert 2xl:block"
/>
)}
New chat
<div className="flex gap-3 p-2 items-center justify-between w-full">
<div className="flex align-start gap-2">
{!isCollapsed && !isMobile && (
<Image
src={OllamaLogo}
alt="AI"
width={14}
height={14}
className="dark:invert 2xl:block"
/>
)}
<span>New chat</span>
</div>
<Pencil2Icon className="w-4 h-4" />
</div>
<Pencil2Icon className="shrink-0 w-4 h-4" />
</Link>
</div>
<SidebarTabs
Expand Down
Loading

0 comments on commit ef8c13b

Please sign in to comment.