From 4a6a87d10f4578f17dbe3f786d06677ef746fe39 Mon Sep 17 00:00:00 2001 From: Garance Date: Wed, 2 Oct 2024 12:23:56 -0400 Subject: [PATCH] docs: add doc strings --- rig-lancedb/src/lib.rs | 9 +++++++-- rig-lancedb/src/utils/deserializer.rs | 3 +++ rig-lancedb/src/utils/mod.rs | 6 +----- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/rig-lancedb/src/lib.rs b/rig-lancedb/src/lib.rs index 2b4e596d..5da12e38 100644 --- a/rig-lancedb/src/lib.rs +++ b/rig-lancedb/src/lib.rs @@ -22,15 +22,19 @@ fn serde_to_rig_error(e: serde_json::Error) -> VectorStoreError { } pub struct LanceDbVectorStore { - /// Defines which model is used to generate embeddings for the vector store + /// Defines which model is used to generate embeddings for the vector store. model: M, + /// LanceDB table containing embeddings. table: lancedb::Table, + /// Column name in `table` that contains the id of a record. id_field: String, /// Vector search params that are used during vector search operations. search_params: SearchParams, } impl LanceDbVectorStore { + /// Apply the search_params to the vector query. + /// This is a helper function used by the methods `top_n` and `top_n_ids` of the `VectorStoreIndex` trait. fn build_query(&self, mut query: VectorQuery) -> VectorQuery { let SearchParams { distance_type, @@ -136,7 +140,8 @@ impl LanceDbVectorStore { }) } - /// Define index on document table `id` field for search optimization. + /// Define an index on the specified fields of the lanceDB table for search optimization. + /// Note: it is required to add an index on the column containing the embeddings when performing an ANN type vector search. pub async fn create_index( &self, index: Index, diff --git a/rig-lancedb/src/utils/deserializer.rs b/rig-lancedb/src/utils/deserializer.rs index 3a686d28..88f0e8de 100644 --- a/rig-lancedb/src/utils/deserializer.rs +++ b/rig-lancedb/src/utils/deserializer.rs @@ -25,6 +25,8 @@ fn arrow_to_rig_error(e: ArrowError) -> VectorStoreError { VectorStoreError::DatastoreError(Box::new(e)) } +/// Trait used to deserialize data returned from LanceDB queries into a serde_json::Value vector. +/// Data returned by LanceDB is a vector of `RecordBatch` items. pub trait RecordBatchDeserializer { fn deserialize(&self) -> Result, VectorStoreError>; } @@ -43,6 +45,7 @@ impl RecordBatchDeserializer for Vec { impl RecordBatchDeserializer for RecordBatch { fn deserialize(&self) -> Result, VectorStoreError> { + /// Recursive function that matches all possible data types store in LanceDB and converts them to serde_json::Value. fn type_matcher(column: &Arc) -> Result, VectorStoreError> { match column.data_type() { DataType::Null => Ok(vec![serde_json::Value::Null]), diff --git a/rig-lancedb/src/utils/mod.rs b/rig-lancedb/src/utils/mod.rs index e8db4559..46aeab31 100644 --- a/rig-lancedb/src/utils/mod.rs +++ b/rig-lancedb/src/utils/mod.rs @@ -7,16 +7,12 @@ use rig::vector_store::VectorStoreError; use crate::lancedb_to_rig_error; -/// Trait that facilitates the conversion of columnar data returned by a lanceDb query to the desired struct. +/// Trait that facilitates the conversion of columnar data returned by a lanceDb query to serde_json::Value. /// Used whenever a lanceDb table is queried. -/// First, execute the query and get the result as a list of RecordBatches (columnar data). -/// Then, convert the record batches to the desired type using the try_from trait. pub trait Query { async fn execute_query(&self) -> Result, VectorStoreError>; } -/// Same as the above trait but for the VectorQuery type. -/// Used whenever a lanceDb table vector search is executed. impl Query for lancedb::query::VectorQuery { async fn execute_query(&self) -> Result, VectorStoreError> { let record_batches = self