From 5c98789204d1254596186c42a896f23e338f47b0 Mon Sep 17 00:00:00 2001 From: Christophe Date: Fri, 20 Sep 2024 12:18:44 -0400 Subject: [PATCH 1/2] feat: Utility methods to simplify in-memory index creation --- rig-core/examples/vector_search.rs | 8 ++------ rig-core/src/vector_store/in_memory_store.rs | 14 ++++++++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/rig-core/examples/vector_search.rs b/rig-core/examples/vector_search.rs index f9ac6cf0..b664c6cd 100644 --- a/rig-core/examples/vector_search.rs +++ b/rig-core/examples/vector_search.rs @@ -3,7 +3,7 @@ use std::env; use rig::{ embeddings::EmbeddingsBuilder, providers::openai::Client, - vector_store::{in_memory_store::InMemoryVectorStore, VectorStore, VectorStoreIndex}, + vector_store::{in_memory_store::InMemoryVectorIndex, VectorStoreIndex}, }; #[tokio::main] @@ -14,8 +14,6 @@ async fn main() -> Result<(), anyhow::Error> { let model = openai_client.embedding_model("text-embedding-ada-002"); - let mut vector_store = InMemoryVectorStore::default(); - let embeddings = EmbeddingsBuilder::new(model.clone()) .simple_document("doc0", "Definition of a *flurbo*: A flurbo is a green alien that lives on cold planets") .simple_document("doc1", "Definition of a *glarb-glarb*: A glarb-glarb is a ancient tool used by the ancestors of the inhabitants of planet Jiro to farm the land.") @@ -23,9 +21,7 @@ async fn main() -> Result<(), anyhow::Error> { .build() .await?; - vector_store.add_documents(embeddings).await?; - - let index = vector_store.index(model); + let index = InMemoryVectorIndex::from_embeddings(model, embeddings).await?; let results = index .top_n_from_query("What is a linglingdong?", 1) diff --git a/rig-core/src/vector_store/in_memory_store.rs b/rig-core/src/vector_store/in_memory_store.rs index 494b339b..87e82d9e 100644 --- a/rig-core/src/vector_store/in_memory_store.rs +++ b/rig-core/src/vector_store/in_memory_store.rs @@ -97,6 +97,15 @@ impl InMemoryVectorStore { pub fn is_empty(&self) -> bool { self.embeddings.is_empty() } + + /// Uitilty method to create an InMemoryVectorStore from a list of embeddings. + pub async fn from_embeddings( + embeddings: Vec, + ) -> Result { + let mut store = InMemoryVectorStore::default(); + store.add_documents(embeddings).await?; + Ok(store) + } } pub struct InMemoryVectorIndex { @@ -151,12 +160,13 @@ impl InMemoryVectorIndex { Ok(store.index(query_model)) } + /// Utility method to create an InMemoryVectorIndex from a list of embeddings + /// and an embedding model. pub async fn from_embeddings( query_model: M, embeddings: Vec, ) -> Result { - let mut store = InMemoryVectorStore::default(); - store.add_documents(embeddings).await?; + let store = InMemoryVectorStore::from_embeddings(embeddings).await?; Ok(store.index(query_model)) } } From 601d69b70a01657836b00bdca00366f95e7a1cbc Mon Sep 17 00:00:00 2001 From: Christophe Date: Fri, 20 Sep 2024 12:23:19 -0400 Subject: [PATCH 2/2] feat: Add more utility methods --- rig-core/src/vector_store/in_memory_store.rs | 28 +++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/rig-core/src/vector_store/in_memory_store.rs b/rig-core/src/vector_store/in_memory_store.rs index 87e82d9e..02c19cf8 100644 --- a/rig-core/src/vector_store/in_memory_store.rs +++ b/rig-core/src/vector_store/in_memory_store.rs @@ -102,10 +102,36 @@ impl InMemoryVectorStore { pub async fn from_embeddings( embeddings: Vec, ) -> Result { - let mut store = InMemoryVectorStore::default(); + let mut store = Self::default(); store.add_documents(embeddings).await?; Ok(store) } + + /// Create an InMemoryVectorStore from a list of documents. + /// The documents are serialized to JSON and embedded using the provided embedding model. + /// The resulting embeddings are stored in an InMemoryVectorStore created by the method. + pub async fn from_documents( + embedding_model: M, + documents: &[(String, T)], + ) -> Result { + let embeddings = documents + .iter() + .fold( + EmbeddingsBuilder::new(embedding_model), + |builder, (id, doc)| { + builder.json_document( + id, + serde_json::to_value(doc).expect("Document should be serializable"), + vec![serde_json::to_string(doc).expect("Document should be serializable")], + ) + }, + ) + .build() + .await?; + + let store = Self::from_embeddings(embeddings).await?; + Ok(store) + } } pub struct InMemoryVectorIndex {