From 0dd7ea457547b16e566e656e9fe18bcd84cf55b0 Mon Sep 17 00:00:00 2001 From: Michael Sloan Date: Thu, 9 Jan 2025 18:26:21 -0700 Subject: [PATCH] assistant2: Background load of context + prep for refresh + efficiency (#22935) * Now loads context on background threads. - For file and directory context, buffer ropes can be shared between threads as they are immutable. This allows for traversal and accumulation of buffer text on a background thread. - For url context, the request, parsing, and rendering is now done on a background thread. * Prepares for support of buffer reload by individually storing the text of directory buffers. * Avoids some string copying / redundant strings. - When attaching message context, no longer builds a string for each context type. - For directory context, does not build a `SharedString` for the full text, instead has a slice of `SharedString` chunks which are then directly appended to the message context. - Building a fenced codeblock for a buffer now computes a precise capacity in advance. Release Notes: - N/A --- crates/assistant2/src/context.rs | 104 +++++----- .../context_picker/fetch_context_picker.rs | 14 +- .../src/context_picker/file_context_picker.rs | 2 +- crates/assistant2/src/context_store.rs | 192 +++++++++++++----- crates/assistant2/src/context_strip.rs | 42 +++- 5 files changed, 244 insertions(+), 110 deletions(-) diff --git a/crates/assistant2/src/context.rs b/crates/assistant2/src/context.rs index ab0def6bc4876..3ef3e649bdd6f 100644 --- a/crates/assistant2/src/context.rs +++ b/crates/assistant2/src/context.rs @@ -2,7 +2,6 @@ use std::path::Path; use std::rc::Rc; use std::sync::Arc; -use collections::BTreeMap; use gpui::{AppContext, Model, SharedString}; use language::Buffer; use language_model::{LanguageModelRequestMessage, MessageContent}; @@ -29,8 +28,8 @@ pub struct ContextSnapshot { pub parent: Option, pub tooltip: Option, pub kind: ContextKind, - /// Text to send to the model. This is not refreshed by `snapshot`. - pub text: SharedString, + /// Concatenating these strings yields text to send to the model. Not refreshed by `snapshot`. + pub text: Box<[SharedString]>, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -60,27 +59,18 @@ impl Context { } } -// TODO: Model holds onto the buffer even if the file is deleted and closed. Should remove -// the context from the message editor in this case. - #[derive(Debug)] pub struct FileContext { pub id: ContextId, - pub buffer: Model, - #[allow(unused)] - pub version: clock::Global, - pub text: SharedString, + pub buffer: ContextBuffer, } #[derive(Debug)] pub struct DirectoryContext { #[allow(unused)] pub path: Rc, - // TODO: The choice to make this a BTreeMap was a result of use in a version of - // ContextStore::will_include_buffer before I realized that the path logic should be used there - // too. #[allow(unused)] - pub buffers: BTreeMap, clock::Global)>, + pub buffers: Vec, pub snapshot: ContextSnapshot, } @@ -101,6 +91,19 @@ pub struct ThreadContext { pub text: SharedString, } +// TODO: Model holds onto the buffer even if the file is deleted and closed. Should remove +// the context from the message editor in this case. + +#[derive(Debug)] +pub struct ContextBuffer { + #[allow(unused)] + pub id: BufferId, + pub buffer: Model, + #[allow(unused)] + pub version: clock::Global, + pub text: SharedString, +} + impl Context { pub fn snapshot(&self, cx: &AppContext) -> Option { match &self { @@ -114,7 +117,7 @@ impl Context { impl FileContext { pub fn path(&self, cx: &AppContext) -> Option> { - let buffer = self.buffer.read(cx); + let buffer = self.buffer.buffer.read(cx); if let Some(file) = buffer.file() { Some(file.path().clone()) } else { @@ -141,7 +144,7 @@ impl FileContext { parent, tooltip: Some(full_path), kind: ContextKind::File, - text: self.text.clone(), + text: Box::new([self.buffer.text.clone()]), }) } } @@ -160,7 +163,7 @@ impl FetchedUrlContext { parent: None, tooltip: None, kind: ContextKind::FetchedUrl, - text: self.text.clone(), + text: Box::new([self.text.clone()]), } } } @@ -174,7 +177,7 @@ impl ThreadContext { parent: None, tooltip: None, kind: ContextKind::Thread, - text: self.text.clone(), + text: Box::new([self.text.clone()]), } } } @@ -183,55 +186,64 @@ pub fn attach_context_to_message( message: &mut LanguageModelRequestMessage, contexts: impl Iterator, ) { - let mut file_context = String::new(); - let mut directory_context = String::new(); - let mut fetch_context = String::new(); - let mut thread_context = String::new(); + let mut file_context = Vec::new(); + let mut directory_context = Vec::new(); + let mut fetch_context = Vec::new(); + let mut thread_context = Vec::new(); for context in contexts { match context.kind { - ContextKind::File => { - file_context.push_str(&context.text); - file_context.push('\n'); - } - ContextKind::Directory => { - directory_context.push_str(&context.text); - directory_context.push('\n'); - } - ContextKind::FetchedUrl => { - fetch_context.push_str(&context.name); - fetch_context.push('\n'); - fetch_context.push_str(&context.text); - fetch_context.push('\n'); - } - ContextKind::Thread { .. } => { - thread_context.push_str(&context.name); - thread_context.push('\n'); - thread_context.push_str(&context.text); - thread_context.push('\n'); - } + ContextKind::File => file_context.push(context), + ContextKind::Directory => directory_context.push(context), + ContextKind::FetchedUrl => fetch_context.push(context), + ContextKind::Thread => thread_context.push(context), } } let mut context_text = String::new(); + if !file_context.is_empty() { context_text.push_str("The following files are available:\n"); - context_text.push_str(&file_context); + for context in file_context { + for chunk in context.text { + context_text.push_str(&chunk); + } + context_text.push('\n'); + } } if !directory_context.is_empty() { context_text.push_str("The following directories are available:\n"); - context_text.push_str(&directory_context); + for context in directory_context { + for chunk in context.text { + context_text.push_str(&chunk); + } + context_text.push('\n'); + } } if !fetch_context.is_empty() { context_text.push_str("The following fetched results are available\n"); - context_text.push_str(&fetch_context); + for context in fetch_context { + context_text.push_str(&context.name); + context_text.push('\n'); + for chunk in context.text { + context_text.push_str(&chunk); + } + context_text.push('\n'); + } } if !thread_context.is_empty() { context_text.push_str("The following previous conversation threads are available\n"); - context_text.push_str(&thread_context); + for context in thread_context { + context_text.push_str(&context.name); + context_text.push('\n'); + for chunk in context.text { + context_text.push_str(&chunk); + } + context_text.push('\n'); + } } if !context_text.is_empty() { diff --git a/crates/assistant2/src/context_picker/fetch_context_picker.rs b/crates/assistant2/src/context_picker/fetch_context_picker.rs index 6704150096547..3119f30fbe948 100644 --- a/crates/assistant2/src/context_picker/fetch_context_picker.rs +++ b/crates/assistant2/src/context_picker/fetch_context_picker.rs @@ -81,13 +81,12 @@ impl FetchContextPickerDelegate { } } - async fn build_message(http_client: Arc, url: &str) -> Result { - let prefixed_url = if !url.starts_with("https://") && !url.starts_with("http://") { - Some(format!("https://{url}")) + async fn build_message(http_client: Arc, url: String) -> Result { + let url = if !url.starts_with("https://") && !url.starts_with("http://") { + format!("https://{url}") } else { - None + url }; - let url = prefixed_url.as_deref().unwrap_or(url); let mut response = http_client.get(&url, AsyncBody::default(), true).await?; @@ -196,7 +195,10 @@ impl PickerDelegate for FetchContextPickerDelegate { let url = self.url.clone(); let confirm_behavior = self.confirm_behavior; cx.spawn(|this, mut cx| async move { - let text = Self::build_message(http_client, &url).await?; + let text = cx + .background_executor() + .spawn(Self::build_message(http_client, url.clone())) + .await?; this.update(&mut cx, |this, cx| { this.delegate diff --git a/crates/assistant2/src/context_picker/file_context_picker.rs b/crates/assistant2/src/context_picker/file_context_picker.rs index 288483bf2f1c6..8db3b46e0d630 100644 --- a/crates/assistant2/src/context_picker/file_context_picker.rs +++ b/crates/assistant2/src/context_picker/file_context_picker.rs @@ -201,7 +201,7 @@ impl PickerDelegate for FileContextPickerDelegate { let Some(task) = self .context_store .update(cx, |context_store, cx| { - context_store.add_file(project_path, cx) + context_store.add_file_from_path(project_path, cx) }) .ok() else { diff --git a/crates/assistant2/src/context_store.rs b/crates/assistant2/src/context_store.rs index 84b4d0b97bf0b..61e1ac280c6d1 100644 --- a/crates/assistant2/src/context_store.rs +++ b/crates/assistant2/src/context_store.rs @@ -1,18 +1,18 @@ -use std::fmt::Write as _; use std::path::{Path, PathBuf}; use std::sync::Arc; use anyhow::{anyhow, bail, Result}; use collections::{BTreeMap, HashMap}; -use gpui::{AppContext, Model, ModelContext, SharedString, Task, WeakView}; +use gpui::{AppContext, AsyncAppContext, Model, ModelContext, SharedString, Task, WeakView}; use language::Buffer; use project::{ProjectPath, Worktree}; +use rope::Rope; use text::BufferId; use workspace::Workspace; use crate::context::{ - Context, ContextId, ContextKind, ContextSnapshot, DirectoryContext, FetchedUrlContext, - FileContext, ThreadContext, + Context, ContextBuffer, ContextId, ContextKind, ContextSnapshot, DirectoryContext, + FetchedUrlContext, FileContext, ThreadContext, }; use crate::thread::{Thread, ThreadId}; @@ -61,12 +61,13 @@ impl ContextStore { self.fetched_urls.clear(); } - pub fn add_file( + pub fn add_file_from_path( &mut self, project_path: ProjectPath, cx: &mut ModelContext, ) -> Task> { let workspace = self.workspace.clone(); + let Some(project) = workspace .upgrade() .map(|workspace| workspace.read(cx).project().clone()) @@ -79,8 +80,8 @@ impl ContextStore { project.open_buffer(project_path.clone(), cx) })?; - let buffer = open_buffer_task.await?; - let buffer_id = buffer.update(&mut cx, |buffer, _cx| buffer.remote_id())?; + let buffer_model = open_buffer_task.await?; + let buffer_id = this.update(&mut cx, |_, cx| buffer_model.read(cx).remote_id())?; let already_included = this.update(&mut cx, |this, _cx| { match this.will_include_buffer(buffer_id, &project_path.path) { @@ -97,30 +98,61 @@ impl ContextStore { return anyhow::Ok(()); } - this.update(&mut cx, |this, cx| { - this.insert_file(buffer, cx); + let (buffer_info, text_task) = this.update(&mut cx, |_, cx| { + let buffer = buffer_model.read(cx); + collect_buffer_info_and_text( + project_path.path.clone(), + buffer_model, + buffer, + &cx.to_async(), + ) + })?; + + let text = text_task.await; + + this.update(&mut cx, |this, _cx| { + this.insert_file(make_context_buffer(buffer_info, text)); })?; anyhow::Ok(()) }) } - pub fn insert_file(&mut self, buffer_model: Model, cx: &AppContext) { - let buffer = buffer_model.read(cx); - let Some(file) = buffer.file() else { - return; - }; + pub fn add_file_from_buffer( + &mut self, + buffer_model: Model, + cx: &mut ModelContext, + ) -> Task> { + cx.spawn(|this, mut cx| async move { + let (buffer_info, text_task) = this.update(&mut cx, |_, cx| { + let buffer = buffer_model.read(cx); + let Some(file) = buffer.file() else { + return Err(anyhow!("Buffer has no path.")); + }; + Ok(collect_buffer_info_and_text( + file.path().clone(), + buffer_model, + buffer, + &cx.to_async(), + )) + })??; - let mut text = String::new(); - push_fenced_codeblock(file.path(), buffer.text(), &mut text); + let text = text_task.await; + this.update(&mut cx, |this, _cx| { + this.insert_file(make_context_buffer(buffer_info, text)) + })?; + + anyhow::Ok(()) + }) + } + + pub fn insert_file(&mut self, context_buffer: ContextBuffer) { let id = self.next_context_id.post_inc(); - self.files.insert(buffer.remote_id(), id); + self.files.insert(context_buffer.id, id); self.context.push(Context::File(FileContext { id, - buffer: buffer_model, - version: buffer.version.clone(), - text: text.into(), + buffer: context_buffer, })); } @@ -162,7 +194,7 @@ impl ContextStore { let open_buffer_tasks = project.update(&mut cx, |project, cx| { files - .into_iter() + .iter() .map(|file_path| { project.open_buffer( ProjectPath { @@ -177,29 +209,36 @@ impl ContextStore { let buffers = futures::future::join_all(open_buffer_tasks).await; - this.update(&mut cx, |this, cx| { - let mut text = String::new(); - let mut directory_buffers = BTreeMap::new(); - for buffer_model in buffers { + let mut buffer_infos = Vec::new(); + let mut text_tasks = Vec::new(); + this.update(&mut cx, |_, cx| { + for (path, buffer_model) in files.into_iter().zip(buffers) { let buffer_model = buffer_model?; let buffer = buffer_model.read(cx); - let path = buffer.file().map_or(&project_path.path, |file| file.path()); - push_fenced_codeblock(&path, buffer.text(), &mut text); - directory_buffers - .insert(buffer.remote_id(), (buffer_model, buffer.version.clone())); + let (buffer_info, text_task) = + collect_buffer_info_and_text(path, buffer_model, buffer, &cx.to_async()); + buffer_infos.push(buffer_info); + text_tasks.push(text_task); } + anyhow::Ok(()) + })??; - if directory_buffers.is_empty() { - bail!( - "could not read any text files from {}", - &project_path.path.display() - ); - } + let buffer_texts = futures::future::join_all(text_tasks).await; + let directory_buffers = buffer_infos + .into_iter() + .zip(buffer_texts.iter()) + .map(|(info, text)| make_context_buffer(info, text.clone())) + .collect::>(); - this.insert_directory(&project_path.path, directory_buffers, text); + if directory_buffers.is_empty() { + bail!("No text files found in {}", &project_path.path.display()); + } - anyhow::Ok(()) - })??; + // TODO: include directory path in text? + + this.update(&mut cx, |this, _| { + this.insert_directory(&project_path.path, directory_buffers, buffer_texts.into()); + })?; anyhow::Ok(()) }) @@ -208,8 +247,8 @@ impl ContextStore { pub fn insert_directory( &mut self, path: &Path, - buffers: BTreeMap, clock::Global)>, - text: impl Into, + buffers: Vec, + text: Box<[SharedString]>, ) { let id = self.next_context_id.post_inc(); self.directories.insert(path.to_path_buf(), id); @@ -235,7 +274,7 @@ impl ContextStore { parent, tooltip: Some(full_path), kind: ContextKind::Directory, - text: text.into(), + text, }, })); } @@ -357,25 +396,80 @@ pub enum FileInclusion { InDirectory(PathBuf), } -pub(crate) fn push_fenced_codeblock(path: &Path, content: String, buffer: &mut String) { - buffer.reserve(content.len() + 64); - - write!(buffer, "```").unwrap(); +// ContextBuffer without text. +struct BufferInfo { + buffer_model: Model, + id: BufferId, + version: clock::Global, +} - if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) { - write!(buffer, "{} ", extension).unwrap(); +fn make_context_buffer(info: BufferInfo, text: SharedString) -> ContextBuffer { + ContextBuffer { + id: info.id, + buffer: info.buffer_model, + version: info.version, + text, } +} - write!(buffer, "{}", path.display()).unwrap(); +fn collect_buffer_info_and_text( + path: Arc, + buffer_model: Model, + buffer: &Buffer, + cx: &AsyncAppContext, +) -> (BufferInfo, Task) { + let buffer_info = BufferInfo { + id: buffer.remote_id(), + buffer_model, + version: buffer.version(), + }; + // Important to collect version at the same time as content so that staleness logic is correct. + let content = buffer.as_rope().clone(); + let text_task = cx + .background_executor() + .spawn(async move { to_fenced_codeblock(&path, content) }); + (buffer_info, text_task) +} + +fn to_fenced_codeblock(path: &Path, content: Rope) -> SharedString { + let path_extension = path.extension().and_then(|ext| ext.to_str()); + let path_string = path.to_string_lossy(); + let capacity = 3 + + path_extension.map_or(0, |extension| extension.len() + 1) + + path_string.len() + + 1 + + content.len() + + 5; + let mut buffer = String::with_capacity(capacity); + + buffer.push_str("```"); + + if let Some(extension) = path_extension { + buffer.push_str(extension); + buffer.push(' '); + } + buffer.push_str(&path_string); buffer.push('\n'); - buffer.push_str(&content); + for chunk in content.chunks() { + buffer.push_str(&chunk); + } if !buffer.ends_with('\n') { buffer.push('\n'); } buffer.push_str("```\n"); + + if buffer.len() > capacity { + log::error!( + "to_fenced_codeblock calculated capacity {} but length was {}", + capacity, + buffer.len() + ); + } + + buffer.into() } fn collect_files_in_path(worktree: &Worktree, path: &Path) -> Vec> { diff --git a/crates/assistant2/src/context_strip.rs b/crates/assistant2/src/context_strip.rs index f62d7991697da..b2dd67846b3f3 100644 --- a/crates/assistant2/src/context_strip.rs +++ b/crates/assistant2/src/context_strip.rs @@ -1,10 +1,11 @@ use std::rc::Rc; +use anyhow::Result; use collections::HashSet; use editor::Editor; use gpui::{ - AppContext, DismissEvent, EventEmitter, FocusHandle, Model, Subscription, View, WeakModel, - WeakView, + DismissEvent, EventEmitter, FocusHandle, Model, ModelContext, Subscription, Task, View, + WeakModel, WeakView, }; use itertools::Itertools; use language::Buffer; @@ -230,12 +231,32 @@ impl Render for ContextStrip { suggested.kind(), { let context_store = self.context_store.clone(); - Rc::new(cx.listener(move |_this, _event, cx| { - context_store.update(cx, |context_store, cx| { - suggested.accept(context_store, cx); + Rc::new(cx.listener(move |this, _event, cx| { + let task = context_store.update(cx, |context_store, cx| { + suggested.accept(context_store, cx) }); - cx.notify(); + let workspace = this.workspace.clone(); + cx.spawn(|this, mut cx| async move { + match task.await { + Ok(()) => { + if let Some(this) = this.upgrade() { + this.update(&mut cx, |_, cx| cx.notify())?; + } + } + Err(err) => { + let Some(workspace) = workspace.upgrade() else { + return anyhow::Ok(()); + }; + + workspace.update(&mut cx, |workspace, cx| { + workspace.show_error(&err, cx); + })?; + } + } + anyhow::Ok(()) + }) + .detach_and_log_err(cx); })) }, )) @@ -299,11 +320,15 @@ impl SuggestedContext { } } - pub fn accept(&self, context_store: &mut ContextStore, cx: &mut AppContext) { + pub fn accept( + &self, + context_store: &mut ContextStore, + cx: &mut ModelContext, + ) -> Task> { match self { Self::File { buffer, name: _ } => { if let Some(buffer) = buffer.upgrade() { - context_store.insert_file(buffer, cx); + return context_store.add_file_from_buffer(buffer, cx); }; } Self::Thread { thread, name: _ } => { @@ -312,6 +337,7 @@ impl SuggestedContext { }; } } + Task::ready(Ok(())) } pub fn kind(&self) -> ContextKind {