-
Notifications
You must be signed in to change notification settings - Fork 74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation Summary Buffer Memory #203
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,16 @@ | ||
import { | ||
assistant, | ||
createStream, | ||
getUserData, | ||
Message, | ||
user, | ||
} from '@/utils/ai'; | ||
import { assistant, createStream, getUserData, user } from '@/utils/ai'; | ||
import { honcho } from '@/utils/honcho'; | ||
import { responsePrompt } from '@/utils/prompts/response'; | ||
import responsePrompt from '@/utils/prompts/response'; | ||
import summaryPrompt from '@/utils/prompts/summary'; | ||
import { NextRequest, NextResponse } from 'next/server'; | ||
|
||
export const runtime = 'nodejs'; | ||
export const maxDuration = 100; | ||
export const dynamic = 'force-dynamic'; // always run dynamically | ||
|
||
const MAX_CONTEXT_SIZE = 11; | ||
const SUMMARY_SIZE = 5; | ||
|
||
export async function POST(req: NextRequest) { | ||
const { message, conversationId, thought, honchoThought } = await req.json(); | ||
|
||
|
@@ -25,25 +23,142 @@ export async function POST(req: NextRequest) { | |
|
||
const { appId, userId } = userData; | ||
|
||
const responseIter = await honcho.apps.users.sessions.messages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{} | ||
const [responseIter, honchoIter, summaryIter] = await Promise.all([ | ||
honcho.apps.users.sessions.messages.list(appId, userId, conversationId, { | ||
reverse: true, | ||
size: MAX_CONTEXT_SIZE, | ||
}), | ||
honcho.apps.users.sessions.metamessages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
metamessage_type: 'honcho', | ||
reverse: true, | ||
size: MAX_CONTEXT_SIZE, | ||
} | ||
), | ||
honcho.apps.users.sessions.metamessages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
metamessage_type: 'summary', | ||
reverse: true, | ||
size: 1, | ||
} | ||
), | ||
]); | ||
|
||
const responseHistory = Array.from(responseIter.items).reverse(); | ||
const honchoHistory = Array.from(honchoIter.items).reverse(); | ||
const summaryHistory = Array.from(summaryIter.items); | ||
|
||
// Get the last summary content | ||
const lastSummary = summaryHistory[0]?.content; | ||
|
||
// Find the index of the message associated with the last summary | ||
const lastSummaryMessageIndex = responseHistory.findIndex( | ||
(m) => m.id === summaryHistory[0]?.message_id | ||
); | ||
console.log('lastSummaryMessageIndex', lastSummaryMessageIndex); | ||
|
||
const responseHistory = Array.from(responseIter.items); | ||
// Check if we've exceeded max context size since last summary | ||
const messagesSinceLastSummary = | ||
lastSummaryMessageIndex === -1 | ||
? responseHistory.length | ||
: responseHistory.length - lastSummaryMessageIndex; | ||
|
||
const honchoIter = await honcho.apps.users.sessions.metamessages.list( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
metamessage_type: 'honcho', | ||
const needsSummary = messagesSinceLastSummary >= MAX_CONTEXT_SIZE; | ||
console.log('messagesSinceLastSummary', messagesSinceLastSummary); | ||
console.log('needsSummary', needsSummary); | ||
|
||
const lastMessageOfSummary = needsSummary | ||
? responseHistory[responseHistory.length - MAX_CONTEXT_SIZE + SUMMARY_SIZE] | ||
: undefined; | ||
|
||
let newSummary: string | undefined; | ||
|
||
console.log('=== CONVERSATION STATUS ==='); | ||
console.log('Total messages:', responseHistory.length); | ||
console.log('Messages since last summary:', messagesSinceLastSummary); | ||
console.log('Last summary message index:', lastSummaryMessageIndex); | ||
console.log('Last summary content:', lastSummary); | ||
console.log('Last message of summary:', lastMessageOfSummary?.content); | ||
console.log('Needs summary:', needsSummary); | ||
console.log('================================'); | ||
if (needsSummary) { | ||
console.log('=== Starting Summary Generation ==='); | ||
|
||
// Get the most recent MAX_CONTEXT_SIZE messages | ||
const recentMessages = responseHistory.slice(-MAX_CONTEXT_SIZE); | ||
console.log('Recent messages:', recentMessages); | ||
|
||
// Get the oldest SUMMARY_SIZE messages from those | ||
const messagesToSummarize = recentMessages.slice(0, SUMMARY_SIZE); | ||
console.log('Messages to summarize:', messagesToSummarize); | ||
|
||
// Format messages for summary prompt | ||
const formattedMessages = messagesToSummarize | ||
.map((msg) => { | ||
if (msg.is_user) { | ||
return `User: ${msg.content}`; | ||
} | ||
return `Assistant: ${msg.content}`; | ||
}) | ||
.join('\n'); | ||
console.log('Formatted messages:', formattedMessages); | ||
|
||
// Create summary prompt with existing summary if available | ||
const summaryMessages = [ | ||
...summaryPrompt, | ||
user`<new_messages> | ||
${formattedMessages} | ||
</new_messages> | ||
|
||
<existing_summary> | ||
${lastSummary || ''} | ||
</existing_summary>`, | ||
]; | ||
console.log('Summary messages:', summaryMessages); | ||
|
||
// Get summary response | ||
console.log('Creating summary stream...'); | ||
const summaryStream = await createStream(summaryMessages, { | ||
sessionId: conversationId, | ||
userId, | ||
type: 'summary', | ||
}); | ||
|
||
if (!summaryStream) { | ||
console.error('Failed to get summary stream'); | ||
throw new Error('Failed to get summary stream'); | ||
} | ||
); | ||
|
||
const honchoHistory = Array.from(honchoIter.items); | ||
// Read the full response from the stream | ||
console.log('Reading stream...'); | ||
const reader = summaryStream.body?.getReader(); | ||
if (!reader) { | ||
console.error('Failed to get reader from summary stream'); | ||
throw new Error('Failed to get reader from summary stream'); | ||
} | ||
|
||
let fullResponse = ''; | ||
while (true) { | ||
const { done, value } = await reader.read(); | ||
if (done) break; | ||
const chunk = new TextDecoder().decode(value); | ||
fullResponse += chunk; | ||
} | ||
console.log('Full response:', fullResponse); | ||
|
||
// Extract summary from response | ||
const summaryMatch = fullResponse.match(/<summary>([\s\S]*?)<\/summary/); | ||
newSummary = summaryMatch ? summaryMatch[1] : undefined; | ||
console.log('Extracted summary:', newSummary); | ||
|
||
console.log('=== Summary Generation Complete ==='); | ||
} | ||
|
||
console.log('honchoHistory', honchoHistory); | ||
console.log('responseHistory', responseHistory); | ||
|
@@ -52,7 +167,7 @@ export async function POST(req: NextRequest) { | |
honchoHistory.find((m) => m.message_id === id)?.content || | ||
'No Honcho Message'; | ||
|
||
const history = responseHistory.map((message, i) => { | ||
const history = responseHistory.map((message) => { | ||
if (message.is_user) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should restrict the history query to only get a fixed number of messages. Currently with an |
||
return user`<honcho>${getHonchoMessage(message.id)}</honcho> | ||
${message.content}`; | ||
|
@@ -61,10 +176,12 @@ export async function POST(req: NextRequest) { | |
} | ||
}); | ||
|
||
const summaryMessage = user`<past_summary>${newSummary || lastSummary}</past_summary>`; | ||
|
||
const finalMessage = user`<honcho>${honchoThought}</honcho> | ||
${message}`; | ||
|
||
const prompt = [...responsePrompt, ...history, finalMessage]; | ||
const prompt = [...responsePrompt, summaryMessage, ...history, finalMessage]; | ||
|
||
console.log('responsePrompt', prompt); | ||
|
||
|
@@ -126,6 +243,23 @@ export async function POST(req: NextRequest) { | |
content: response.text, | ||
} | ||
), | ||
|
||
// Save summary metamessage if one was created | ||
...(newSummary | ||
? [ | ||
honcho.apps.users.sessions.metamessages.create( | ||
appId, | ||
userId, | ||
conversationId, | ||
{ | ||
message_id: lastMessageOfSummary!.id, | ||
metamessage_type: 'summary', | ||
content: newSummary, | ||
metadata: { type: 'assistant' }, | ||
} | ||
), | ||
] | ||
: []), | ||
]); | ||
} | ||
); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The thought chain is also going to run into the same problem of filling up its context window if it has to load the entire conversation. Can we use the same summary here or does it need to be a different summary? |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
import { Message, user, assistant } from '@/utils/ai'; | ||
export const namePrompt: Message[] = [ | ||
user`Your task is to create a 5-word or less summary of the conversation topic, starting with an action verb. | ||
|
||
Rules: | ||
1. Must start with an action verb | ||
2. Maximum 5 words | ||
3. Be specific but concise | ||
4. Focus on the core topic/goal | ||
|
||
Does that make sense?`, | ||
assistant`Yes, it makes sense. Send the first message whenever you're ready.`, | ||
user`I want to learn about quantum physics and understand the basic principles behind quantum mechanics`, | ||
assistant`Exploring quantum physics fundamentals`, | ||
user`Can you help me write a poem about love and loss? I want it to be meaningful and touching`, | ||
assistant`Crafting emotional love poetry`, | ||
]; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use
createCompletion
here.