Skip to content

Commit

Permalink
Fix: Inefficient context documents serialization (#100)
Browse files Browse the repository at this point in the history
* fix: Inefficient serialization in InMemoryVectorStore

* fix: Inefficient context document serialization

* style: cargo fmt

* fix: example
  • Loading branch information
cvauclair authored Nov 13, 2024
1 parent 531b545 commit 0b89fff
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 6 deletions.
6 changes: 3 additions & 3 deletions rig-core/examples/vector_search.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use std::env;

use rig::{
embeddings::{DocumentEmbeddings, EmbeddingsBuilder},
embeddings::EmbeddingsBuilder,
providers::openai::{Client, TEXT_EMBEDDING_ADA_002},
vector_store::{in_memory_store::InMemoryVectorIndex, VectorStoreIndex},
};
Expand All @@ -24,10 +24,10 @@ async fn main() -> Result<(), anyhow::Error> {
let index = InMemoryVectorIndex::from_embeddings(model, embeddings).await?;

let results = index
.top_n::<DocumentEmbeddings>("What is a linglingdong?", 1)
.top_n::<String>("What is a linglingdong?", 1)
.await?
.into_iter()
.map(|(score, id, doc)| (score, id, doc.document))
.map(|(score, id, doc)| (score, id, doc))
.collect::<Vec<_>>();

println!("Results: {:?}", results);
Expand Down
4 changes: 2 additions & 2 deletions rig-core/src/vector_store/in_memory_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,11 @@ impl<M: EmbeddingModel + std::marker::Sync> VectorStoreIndex for InMemoryVectorI
// Return n best
docs.into_iter()
.map(|Reverse(RankingItem(distance, _, doc, _))| {
let doc_value = serde_json::to_value(doc).map_err(VectorStoreError::JsonError)?;
Ok((
distance.0,
doc.id.clone(),
serde_json::from_value(doc_value).map_err(VectorStoreError::JsonError)?,
serde_json::from_value(doc.document.clone())
.map_err(VectorStoreError::JsonError)?,
))
})
.collect::<Result<Vec<_>, _>>()
Expand Down
32 changes: 31 additions & 1 deletion rig-core/src/vector_store/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,14 @@ impl<I: VectorStoreIndex> VectorStoreIndexDyn for I {
query: &'a str,
n: usize,
) -> BoxFuture<'a, Result<Vec<(f64, String, Value)>, VectorStoreError>> {
Box::pin(self.top_n(query, n))
Box::pin(async move {
Ok(self
.top_n::<serde_json::Value>(query, n)
.await?
.into_iter()
.map(|(score, id, doc)| (score, id, prune_document(doc).unwrap_or_default()))
.collect::<Vec<_>>())
})
}

fn top_n_ids<'a>(
Expand All @@ -96,3 +103,26 @@ impl<I: VectorStoreIndex> VectorStoreIndexDyn for I {
Box::pin(self.top_n_ids(query, n))
}
}

fn prune_document(document: serde_json::Value) -> Option<serde_json::Value> {
match document {
Value::Object(mut map) => {
let new_map = map
.iter_mut()
.filter_map(|(key, value)| {
prune_document(value.take()).map(|value| (key.clone(), value))
})
.collect::<serde_json::Map<_, _>>();

Some(Value::Object(new_map))
}
Value::Array(vec) if vec.len() > 400 => None,
Value::Array(vec) => Some(Value::Array(
vec.into_iter().filter_map(prune_document).collect(),
)),
Value::Number(num) => Some(Value::Number(num)),
Value::String(s) => Some(Value::String(s)),
Value::Bool(b) => Some(Value::Bool(b)),
Value::Null => Some(Value::Null),
}
}

0 comments on commit 0b89fff

Please sign in to comment.