-
Notifications
You must be signed in to change notification settings - Fork 76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation Summary Buffer Memory #203
Changes from 5 commits
1a1eaf0
f42cae4
664b1dc
27557b7
6930bd9
88ed33b
162ef0e
dca2386
b8628bb
0712ce4
b9c0b0a
d072013
42ec019
e47d7f1
75c85ad
f3080d8
dabcfc3
15acb39
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,16 @@ | ||
import { | ||
assistant, | ||
createStream, | ||
getUserData, | ||
Message, | ||
user, | ||
} from '@/utils/ai'; | ||
import { assistant, createStream, getUserData, user } from '@/utils/ai'; | ||
import { honcho } from '@/utils/honcho'; | ||
import { responsePrompt } from '@/utils/prompts/response'; | ||
import responsePrompt from '@/utils/prompts/response'; | ||
import summaryPrompt from '@/utils/prompts/summary'; | ||
import { NextRequest, NextResponse } from 'next/server'; | ||
|
||
export const runtime = 'nodejs'; | ||
export const maxDuration = 100; | ||
export const dynamic = 'force-dynamic'; // always run dynamically | ||
|
||
const MAX_CONTEXT_SIZE = 11; | ||
const SUMMARY_SIZE = 5; | ||
|
||
export async function POST(req: NextRequest) { | ||
const { message, conversationId, thought, honchoThought } = await req.json(); | ||
|
||
|
@@ -25,25 +23,142 @@ export async function POST(req: NextRequest) { | |
|
||
const { appId, userId } = userData; | ||
|
||
const responseIter = await honcho.apps.users.sessions.messages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{} | ||
const [responseIter, honchoIter, summaryIter] = await Promise.all([ | ||
honcho.apps.users.sessions.messages.list(appId, userId, conversationId, { | ||
reverse: true, | ||
size: MAX_CONTEXT_SIZE, | ||
}), | ||
honcho.apps.users.sessions.metamessages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
metamessage_type: 'honcho', | ||
reverse: true, | ||
size: MAX_CONTEXT_SIZE, | ||
} | ||
), | ||
honcho.apps.users.sessions.metamessages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
metamessage_type: 'summary', | ||
reverse: true, | ||
size: 1, | ||
} | ||
), | ||
]); | ||
|
||
const responseHistory = Array.from(responseIter.items).reverse(); | ||
const honchoHistory = Array.from(honchoIter.items).reverse(); | ||
const summaryHistory = Array.from(summaryIter.items); | ||
|
||
// Get the last summary content | ||
const lastSummary = summaryHistory[0]?.content; | ||
|
||
// Find the index of the message associated with the last summary | ||
const lastSummaryMessageIndex = responseHistory.findIndex( | ||
(m) => m.id === summaryHistory[0]?.message_id | ||
); | ||
console.log('lastSummaryMessageIndex', lastSummaryMessageIndex); | ||
|
||
const responseHistory = Array.from(responseIter.items); | ||
// Check if we've exceeded max context size since last summary | ||
const messagesSinceLastSummary = | ||
lastSummaryMessageIndex === -1 | ||
? responseHistory.length | ||
: responseHistory.length - lastSummaryMessageIndex; | ||
|
||
const honchoIter = await honcho.apps.users.sessions.metamessages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
metamessage_type: 'honcho', | ||
const needsSummary = messagesSinceLastSummary >= MAX_CONTEXT_SIZE; | ||
console.log('messagesSinceLastSummary', messagesSinceLastSummary); | ||
console.log('needsSummary', needsSummary); | ||
|
||
const lastMessageOfSummary = needsSummary | ||
? responseHistory[responseHistory.length - MAX_CONTEXT_SIZE + SUMMARY_SIZE] | ||
: undefined; | ||
|
||
let newSummary: string | undefined; | ||
|
||
console.log('=== CONVERSATION STATUS ==='); | ||
console.log('Total messages:', responseHistory.length); | ||
console.log('Messages since last summary:', messagesSinceLastSummary); | ||
console.log('Last summary message index:', lastSummaryMessageIndex); | ||
console.log('Last summary content:', lastSummary); | ||
console.log('Last message of summary:', lastMessageOfSummary?.content); | ||
console.log('Needs summary:', needsSummary); | ||
console.log('================================'); | ||
if (needsSummary) { | ||
console.log('=== Starting Summary Generation ==='); | ||
|
||
// Get the most recent MAX_CONTEXT_SIZE messages | ||
const recentMessages = responseHistory.slice(-MAX_CONTEXT_SIZE); | ||
console.log('Recent messages:', recentMessages); | ||
|
||
// Get the oldest SUMMARY_SIZE messages from those | ||
const messagesToSummarize = recentMessages.slice(0, SUMMARY_SIZE); | ||
console.log('Messages to summarize:', messagesToSummarize); | ||
|
||
// Format messages for summary prompt | ||
const formattedMessages = messagesToSummarize | ||
.map((msg) => { | ||
if (msg.is_user) { | ||
return `User: ${msg.content}`; | ||
} | ||
return `Assistant: ${msg.content}`; | ||
}) | ||
.join('\n'); | ||
console.log('Formatted messages:', formattedMessages); | ||
|
||
// Create summary prompt with existing summary if available | ||
const summaryMessages = [ | ||
...summaryPrompt, | ||
user`<new_messages> | ||
${formattedMessages} | ||
</new_messages> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After doing some testing I was noticing that this is quite a slow process to do synchronously as the user is chatting with Bloom. It took upwards of 2 minutes for the entire flow to work. Instead of making it a synchronous part of the process we should maybe pre-emptively fire off a background job that creates the summary that will be used in the next conversation turn. That way the current response can continue without delay, but a summary will still be available. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what would the best way of deploying "background jobs" be on vercel? a separate route that we send a request to but never await? also two minutes seems way too long for a single prompt call i've surely messed something up here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wait im not seeing 2 min response times on my end. can you reproduce? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The timeout really depends on our inference provider and the load they are having. Sometimes deepinfra is hella slow and sometimes it's fine. For background jobs I'd say a vercel edge function could work or maybe just releasing a promise that we don't await. |
||
|
||
<existing_summary> | ||
${lastSummary || ''} | ||
</existing_summary>`, | ||
]; | ||
console.log('Summary messages:', summaryMessages); | ||
|
||
// Get summary response | ||
console.log('Creating summary stream...'); | ||
const summaryStream = await createStream(summaryMessages, { | ||
sessionId: conversationId, | ||
userId, | ||
type: 'summary', | ||
}); | ||
|
||
if (!summaryStream) { | ||
console.error('Failed to get summary stream'); | ||
throw new Error('Failed to get summary stream'); | ||
} | ||
VVoruganti marked this conversation as resolved.
Show resolved
Hide resolved
|
||
); | ||
|
||
const honchoHistory = Array.from(honchoIter.items); | ||
// Read the full response from the stream | ||
console.log('Reading stream...'); | ||
const reader = summaryStream.body?.getReader(); | ||
if (!reader) { | ||
console.error('Failed to get reader from summary stream'); | ||
throw new Error('Failed to get reader from summary stream'); | ||
} | ||
|
||
let fullResponse = ''; | ||
while (true) { | ||
const { done, value } = await reader.read(); | ||
if (done) break; | ||
const chunk = new TextDecoder().decode(value); | ||
fullResponse += chunk; | ||
} | ||
console.log('Full response:', fullResponse); | ||
|
||
// Extract summary from response | ||
const summaryMatch = fullResponse.match(/<summary>([\s\S]*?)<\/summary/); | ||
newSummary = summaryMatch ? summaryMatch[1] : undefined; | ||
console.log('Extracted summary:', newSummary); | ||
|
||
console.log('=== Summary Generation Complete ==='); | ||
} | ||
|
||
console.log('honchoHistory', honchoHistory); | ||
console.log('responseHistory', responseHistory); | ||
|
@@ -52,7 +167,7 @@ export async function POST(req: NextRequest) { | |
honchoHistory.find((m) => m.message_id === id)?.content || | ||
'No Honcho Message'; | ||
|
||
const history = responseHistory.map((message, i) => { | ||
const history = responseHistory.map((message) => { | ||
if (message.is_user) { | ||
VVoruganti marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return user`<honcho>${getHonchoMessage(message.id)}</honcho> | ||
${message.content}`; | ||
|
@@ -61,10 +176,12 @@ export async function POST(req: NextRequest) { | |
} | ||
}); | ||
|
||
const summaryMessage = user`<past_summary>${newSummary || lastSummary}</past_summary>`; | ||
|
||
const finalMessage = user`<honcho>${honchoThought}</honcho> | ||
${message}`; | ||
|
||
const prompt = [...responsePrompt, ...history, finalMessage]; | ||
const prompt = [...responsePrompt, summaryMessage, ...history, finalMessage]; | ||
|
||
console.log('responsePrompt', prompt); | ||
|
||
|
@@ -126,6 +243,23 @@ export async function POST(req: NextRequest) { | |
content: response.text, | ||
} | ||
), | ||
|
||
// Save summary metamessage if one was created | ||
...(newSummary | ||
? [ | ||
honcho.apps.users.sessions.metamessages.create( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
message_id: lastMessageOfSummary!.id, | ||
metamessage_type: 'summary', | ||
content: newSummary, | ||
metadata: { type: 'assistant' }, | ||
} | ||
), | ||
] | ||
: []), | ||
]); | ||
} | ||
); | ||
|
VVoruganti marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import { Message, user, assistant } from '@/utils/ai'; | ||
export const namePrompt: Message[] = [ | ||
user`Your task is to create a 5-word or less summary of the conversation topic, starting with an action verb. | ||
|
||
Rules: | ||
1. Must start with an action verb | ||
2. Maximum 5 words | ||
3. Be specific but concise | ||
4. Focus on the core topic/goal | ||
|
||
Does that make sense?`, | ||
assistant`Yes, it makes sense. Send the first message whenever you're ready.`, | ||
user`I want to learn about quantum physics and understand the basic principles behind quantum mechanics`, | ||
assistant`Exploring quantum physics fundamentals`, | ||
user`Can you help me write a poem about love and loss? I want it to be meaningful and touching`, | ||
assistant`Crafting emotional love poetry`, | ||
]; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,62 @@ | ||
import { Message, user, assistant } from '@/utils/ai'; | ||
export const summaryPrompt: Message[] = [ | ||
user`Your task is to create a 5-word or less summary of the conversation topic, starting with an action verb. | ||
Rules: | ||
1. Must start with an action verb | ||
2. Maximum 5 words | ||
3. Be specific but concise | ||
4. Focus on the core topic/goal | ||
Does that make sense?`, | ||
assistant`Yes, it makes sense. Send the first message whenever you're ready.`, | ||
user`I want to learn about quantum physics and understand the basic principles behind quantum mechanics`, | ||
assistant`Exploring quantum physics fundamentals`, | ||
user`Can you help me write a poem about love and loss? I want it to be meaningful and touching`, | ||
assistant`Crafting emotional love poetry`, | ||
import { user, assistant, Message } from '@/utils/ai'; | ||
|
||
const MAXIMUM_SUMMARY_SIZE: string = '6 sentences'; | ||
|
||
const summaryPrompt: Message[] = [ | ||
user`You are an AI assistant tasked with creating or updating conversation history summaries. Your goal is to produce concise, information-dense summaries that capture key points while adhering to a specified size limit. | ||
The size limit for the summary is: | ||
<size_limit> | ||
${MAXIMUM_SUMMARY_SIZE} | ||
</size_limit> | ||
For each summarization task, you will receive the following inputs: | ||
1. New messages to be summarized: | ||
<new_messages> | ||
{NEW_MESSAGES} | ||
</new_messages> | ||
2. An existing summary (if available): | ||
<existing_summary> | ||
{EXISTING_SUMMARY} | ||
</existing_summary> | ||
Instructions: | ||
1. Review the existing summary (if provided) and the new messages. | ||
2. Analyze the conversation inside <analysis> tags: | ||
a. Summarize the existing summary (if any) | ||
b. List key points from new messages | ||
c. Identify overlaps between existing summary and new messages, and highlight new information | ||
d. Prioritize information based on importance and relevance | ||
e. Plan how to express key points concisely | ||
It's OK for this section to be quite long. | ||
3. Create or update the summary based on your analysis: | ||
- Ensure a coherent and chronological flow of information. | ||
- Use concise language and avoid redundancy. | ||
- Combine related points where possible to save space. | ||
- Only mention participant names if crucial for context or decisions. | ||
- Use clear abbreviations for common terms if needed to save space. | ||
4. Check the summary length against the maximum output size. If it exceeds the limit, prioritize critical information and remove less essential details. | ||
5. Present your final summary within <summary> tags. Do not include any explanations or meta-commentary outside these tags. | ||
Example output structure: | ||
<analysis> | ||
[Your detailed analysis of the conversation, including steps a through e as outlined above] | ||
</analysis> | ||
<summary> | ||
[Your concise, information-dense summary of the conversation, adhering to the size limit] | ||
</summary> | ||
Remember, your goal is to create a dense, informative summary that captures the key points of the conversation within the specified size constraint.`, | ||
assistant`Got it. I'm ready for any summarization tasks you have for me!`, | ||
]; | ||
|
||
export default summaryPrompt; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 Verification agent
🧩 Analysis chain
Verify regex pattern change impact.
Removing the 's' flag means the dot won't match newlines. This could cause issues with multi-line honcho content.
🏁 Script executed:
Length of output: 3333
Multi-line Content Handling in Honcho Regex
The regex in
www/app/api/chat/honcho/route.ts
now uses/\<honcho\>(.*?)\<\/honcho\>/
without the dot-all flag, so the dot won’t match newline characters. Our repository search confirms that honcho tags often enclose multi-line content (e.g., inwww/app/api/chat/response/route.ts
andwww/utils/prompts/response.ts
). Please verify if multi-line honcho content should be supported. If so, consider updating the regex (for example, use/\<honcho\>([\s\S]*?)\<\/honcho\>/
or re-enable thes
flag) so that it captures newlines correctly.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This seems reasonable in case the output has
\n
characters for whatever reasonThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@VVoruganti Great! Here's the updated regex pattern that will handle newlines:
✏️ Learnings added