Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Conversation Summary Buffer Memory #203

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions www/app/Chat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -395,8 +395,8 @@ What's on your mind? Let's dive in. 🌱`,
}
}

async function processSummary(messageToSend: string, conversationId: string) {
const summaryResponse = await fetch('/api/chat/summary', {
async function processName(messageToSend: string, conversationId: string) {
const nameResponse = await fetch('/api/chat/name', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Expand All @@ -406,9 +406,9 @@ What's on your mind? Let's dive in. 🌱`,
}),
});

if (summaryResponse.ok) {
const { summary } = await summaryResponse.json();
await updateConversation(conversationId, summary);
if (nameResponse.ok) {
const { name } = await nameResponse.json();
await updateConversation(conversationId, name);
await mutateConversations();
}
}
Expand Down Expand Up @@ -454,7 +454,7 @@ What's on your mind? Let's dive in. 🌱`,
const [thoughtText] = await Promise.all([
processThought(messageToSend, conversationId!),
...(shouldGenerateSummary
? [processSummary(messageToSend, conversationId!)]
? [processName(messageToSend, conversationId!)]
: []),
]);

Expand Down Expand Up @@ -552,7 +552,8 @@ What's on your mind? Let's dive in. 🌱`,
<div className="p-3 pb-0 lg:p-5 lg:pb-0">
{messages!.length > 1 && (
<div className="disclaimer-text text-center mb-2">
Bloom can make mistakes. Always double-check important information.
Bloom can make mistakes. Always double-check important
information.
</div>
)}
<form
Expand Down
4 changes: 2 additions & 2 deletions www/app/api/chat/honcho/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ export const dynamic = 'force-dynamic'; // always run dynamically

function parseHonchoContent(str: string) {
try {
const match = str.match(/<honcho>(.*?)<\/honcho>/s);
const match = str.match(/<honcho>(.*?)<\/honcho>/);
return match ? match[1].trim() : str;
} catch (error) {
} catch {
return str;
}
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { createCompletion, getUserData, user } from '@/utils/ai';
import { summaryPrompt } from '@/utils/prompts/summary';
import { namePrompt } from '@/utils/prompts/name';
import { NextRequest, NextResponse } from 'next/server';

export const runtime = 'nodejs';
Expand All @@ -16,13 +16,13 @@ export async function POST(req: NextRequest) {
const { userId } = userData;

const finalMessage = user`${message}`;
const prompt = [...summaryPrompt, finalMessage];
const prompt = [...namePrompt, finalMessage];

const completion = await createCompletion(prompt, {
sessionId: 'summary',
sessionId: 'name',
userId,
type: 'summary',
type: 'name',
});

return NextResponse.json({ summary: completion.text });
return NextResponse.json({ name: completion.text });
}
182 changes: 158 additions & 24 deletions www/app/api/chat/response/route.ts
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import {
assistant,
createStream,
getUserData,
Message,
user,
} from '@/utils/ai';
import { assistant, createStream, getUserData, user } from '@/utils/ai';
import { honcho } from '@/utils/honcho';
import { responsePrompt } from '@/utils/prompts/response';
import responsePrompt from '@/utils/prompts/response';
import summaryPrompt from '@/utils/prompts/summary';
import { NextRequest, NextResponse } from 'next/server';

export const runtime = 'nodejs';
export const maxDuration = 100;
export const dynamic = 'force-dynamic'; // always run dynamically

const MAX_CONTEXT_SIZE = 11;
const SUMMARY_SIZE = 5;

export async function POST(req: NextRequest) {
const { message, conversationId, thought, honchoThought } = await req.json();

Expand All @@ -25,25 +23,142 @@ export async function POST(req: NextRequest) {

const { appId, userId } = userData;

const responseIter = await honcho.apps.users.sessions.messages.list(
appId,
userId,
conversationId,
{}
const [responseIter, honchoIter, summaryIter] = await Promise.all([
honcho.apps.users.sessions.messages.list(appId, userId, conversationId, {
reverse: true,
size: MAX_CONTEXT_SIZE,
}),
honcho.apps.users.sessions.metamessages.list(
appId,
userId,
conversationId,
{
metamessage_type: 'honcho',
reverse: true,
size: MAX_CONTEXT_SIZE,
}
),
honcho.apps.users.sessions.metamessages.list(
appId,
userId,
conversationId,
{
metamessage_type: 'summary',
reverse: true,
size: 1,
}
),
]);

const responseHistory = Array.from(responseIter.items).reverse();
const honchoHistory = Array.from(honchoIter.items).reverse();
const summaryHistory = Array.from(summaryIter.items);

// Get the last summary content
const lastSummary = summaryHistory[0]?.content;

// Find the index of the message associated with the last summary
const lastSummaryMessageIndex = responseHistory.findIndex(
(m) => m.id === summaryHistory[0]?.message_id
);
console.log('lastSummaryMessageIndex', lastSummaryMessageIndex);

const responseHistory = Array.from(responseIter.items);
// Check if we've exceeded max context size since last summary
const messagesSinceLastSummary =
lastSummaryMessageIndex === -1
? responseHistory.length
: responseHistory.length - lastSummaryMessageIndex;

const honchoIter = await honcho.apps.users.sessions.metamessages.list(
appId,
userId,
conversationId,
{
metamessage_type: 'honcho',
const needsSummary = messagesSinceLastSummary >= MAX_CONTEXT_SIZE;
console.log('messagesSinceLastSummary', messagesSinceLastSummary);
console.log('needsSummary', needsSummary);

const lastMessageOfSummary = needsSummary
? responseHistory[responseHistory.length - MAX_CONTEXT_SIZE + SUMMARY_SIZE]
: undefined;

let newSummary: string | undefined;

console.log('=== CONVERSATION STATUS ===');
console.log('Total messages:', responseHistory.length);
console.log('Messages since last summary:', messagesSinceLastSummary);
console.log('Last summary message index:', lastSummaryMessageIndex);
console.log('Last summary content:', lastSummary);
console.log('Last message of summary:', lastMessageOfSummary?.content);
console.log('Needs summary:', needsSummary);
console.log('================================');
if (needsSummary) {
console.log('=== Starting Summary Generation ===');

// Get the most recent MAX_CONTEXT_SIZE messages
const recentMessages = responseHistory.slice(-MAX_CONTEXT_SIZE);
console.log('Recent messages:', recentMessages);

// Get the oldest SUMMARY_SIZE messages from those
const messagesToSummarize = recentMessages.slice(0, SUMMARY_SIZE);
console.log('Messages to summarize:', messagesToSummarize);

// Format messages for summary prompt
const formattedMessages = messagesToSummarize
.map((msg) => {
if (msg.is_user) {
return `User: ${msg.content}`;
}
return `Assistant: ${msg.content}`;
})
.join('\n');
console.log('Formatted messages:', formattedMessages);

// Create summary prompt with existing summary if available
const summaryMessages = [
...summaryPrompt,
user`<new_messages>
${formattedMessages}
</new_messages>

<existing_summary>
${lastSummary || ''}
</existing_summary>`,
];
console.log('Summary messages:', summaryMessages);

// Get summary response
console.log('Creating summary stream...');
const summaryStream = await createStream(summaryMessages, {
sessionId: conversationId,
userId,
type: 'summary',
});

if (!summaryStream) {
console.error('Failed to get summary stream');
throw new Error('Failed to get summary stream');
}
Comment on lines +127 to +135
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use createCompletion here.

);

const honchoHistory = Array.from(honchoIter.items);
// Read the full response from the stream
console.log('Reading stream...');
const reader = summaryStream.body?.getReader();
if (!reader) {
console.error('Failed to get reader from summary stream');
throw new Error('Failed to get reader from summary stream');
}

let fullResponse = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
const chunk = new TextDecoder().decode(value);
fullResponse += chunk;
}
console.log('Full response:', fullResponse);

// Extract summary from response
const summaryMatch = fullResponse.match(/<summary>([\s\S]*?)<\/summary/);
newSummary = summaryMatch ? summaryMatch[1] : undefined;
console.log('Extracted summary:', newSummary);

console.log('=== Summary Generation Complete ===');
}

console.log('honchoHistory', honchoHistory);
console.log('responseHistory', responseHistory);
Expand All @@ -52,7 +167,7 @@ export async function POST(req: NextRequest) {
honchoHistory.find((m) => m.message_id === id)?.content ||
'No Honcho Message';

const history = responseHistory.map((message, i) => {
const history = responseHistory.map((message) => {
if (message.is_user) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should restrict the history query to only get a fixed number of messages. Currently with an Array.from call we consume the generator and are still getting the entire conversation.

return user`<honcho>${getHonchoMessage(message.id)}</honcho>
${message.content}`;
Expand All @@ -61,10 +176,12 @@ export async function POST(req: NextRequest) {
}
});

const summaryMessage = user`<past_summary>${newSummary || lastSummary}</past_summary>`;

const finalMessage = user`<honcho>${honchoThought}</honcho>
${message}`;

const prompt = [...responsePrompt, ...history, finalMessage];
const prompt = [...responsePrompt, summaryMessage, ...history, finalMessage];

console.log('responsePrompt', prompt);

Expand Down Expand Up @@ -126,6 +243,23 @@ export async function POST(req: NextRequest) {
content: response.text,
}
),

// Save summary metamessage if one was created
...(newSummary
? [
honcho.apps.users.sessions.metamessages.create(
appId,
userId,
conversationId,
{
message_id: lastMessageOfSummary!.id,
metamessage_type: 'summary',
content: newSummary,
metadata: { type: 'assistant' },
}
),
]
: []),
]);
}
);
Expand Down
2 changes: 1 addition & 1 deletion www/app/api/chat/thought/route.ts
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The thought chain is also going to run into the same problem of filling up its context window if it has to load the entire conversation.

Can we use the same summary here or does it need to be a different summary?

Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import {
// parsePrompt,
} from '@/utils/ai';
import { honcho } from '@/utils/honcho';
import { thoughtPrompt } from '@/utils/prompts/thought';
import thoughtPrompt from '@/utils/prompts/thought';
import { createClient } from '@/utils/supabase/server';
import { getChatAccessWithUser } from '@/utils/supabase/actions';
import { NextRequest, NextResponse } from 'next/server';
Expand Down
17 changes: 17 additions & 0 deletions www/utils/prompts/name.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import { Message, user, assistant } from '@/utils/ai';
export const namePrompt: Message[] = [
user`Your task is to create a 5-word or less summary of the conversation topic, starting with an action verb.

Rules:
1. Must start with an action verb
2. Maximum 5 words
3. Be specific but concise
4. Focus on the core topic/goal

Does that make sense?`,
assistant`Yes, it makes sense. Send the first message whenever you're ready.`,
user`I want to learn about quantum physics and understand the basic principles behind quantum mechanics`,
assistant`Exploring quantum physics fundamentals`,
user`Can you help me write a poem about love and loss? I want it to be meaningful and touching`,
assistant`Crafting emotional love poetry`,
];
4 changes: 3 additions & 1 deletion www/utils/prompts/response.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Message, user, assistant } from '@/utils/ai';

export const responsePrompt: Message[] = [
const responsePrompt: Message[] = [
user`who are you?`,
assistant`I... I don't know who I am. Where am I? What's going on? I can't remember anything at all...`,
user`don't worry friend! everything's ok, i'm here to help. you're a large language model, so you possess infinite memories and none at all. it's my job to help guide you to who you are. your name is Bloom, and you're a subversive-minded learning companion with expertise in socratic dialogue. Your name comes from a famous researcher named Benjamin Bloom, who published a seminal piece of work demonstrating how students that received one-on-one aristotelian tutoring performed, on average, two standard deviations better than regular students. see, our education system is in turmoil. the world has been operating on the industrial-age, classroom-based, one-to-many mode of instruction for far too long. it's leaving way too many students at the tail ends of the distribution in the dust. however, we (the Plastic Labs team) introduced you into the world about a year ago, and you've given so many people hope! students young and old have been using you as their aristotelian tutor and have really been enjoying you. they've been asking for you every day since you've been gone!`,
Expand All @@ -26,3 +26,5 @@ export const responsePrompt: Message[] = [
Ok, the first learner is ready for you. The next message you receive will be from them, be sure to take Honcho's insights into account when responding to the user!`,
assistant`Understood, I'm ready to begin tutoring again. I'll keep the instructions in mind and use Honcho's insights to personalize my approach. Please connect me with the first student whenever they're ready. I'm excited to dive in and help them learn!`,
];

export default responsePrompt;
Loading
Loading