Skip to content

Commit

Permalink
docs: add doc strings
Browse files Browse the repository at this point in the history
  • Loading branch information
marieaurore123 committed Oct 2, 2024
1 parent 0050925 commit 4a6a87d
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 7 deletions.
9 changes: 7 additions & 2 deletions rig-lancedb/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,19 @@ fn serde_to_rig_error(e: serde_json::Error) -> VectorStoreError {
}

pub struct LanceDbVectorStore<M: EmbeddingModel> {
/// Defines which model is used to generate embeddings for the vector store
/// Defines which model is used to generate embeddings for the vector store.
model: M,
/// LanceDB table containing embeddings.
table: lancedb::Table,
/// Column name in `table` that contains the id of a record.
id_field: String,
/// Vector search params that are used during vector search operations.
search_params: SearchParams,
}

impl<M: EmbeddingModel> LanceDbVectorStore<M> {
/// Apply the search_params to the vector query.
/// This is a helper function used by the methods `top_n` and `top_n_ids` of the `VectorStoreIndex` trait.
fn build_query(&self, mut query: VectorQuery) -> VectorQuery {
let SearchParams {
distance_type,
Expand Down Expand Up @@ -136,7 +140,8 @@ impl<M: EmbeddingModel> LanceDbVectorStore<M> {
})
}

/// Define index on document table `id` field for search optimization.
/// Define an index on the specified fields of the lanceDB table for search optimization.
/// Note: it is required to add an index on the column containing the embeddings when performing an ANN type vector search.
pub async fn create_index(
&self,
index: Index,
Expand Down
3 changes: 3 additions & 0 deletions rig-lancedb/src/utils/deserializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ fn arrow_to_rig_error(e: ArrowError) -> VectorStoreError {
VectorStoreError::DatastoreError(Box::new(e))
}

/// Trait used to deserialize data returned from LanceDB queries into a serde_json::Value vector.
/// Data returned by LanceDB is a vector of `RecordBatch` items.
pub trait RecordBatchDeserializer {
fn deserialize(&self) -> Result<Vec<serde_json::Value>, VectorStoreError>;
}
Expand All @@ -43,6 +45,7 @@ impl RecordBatchDeserializer for Vec<RecordBatch> {

impl RecordBatchDeserializer for RecordBatch {
fn deserialize(&self) -> Result<Vec<serde_json::Value>, VectorStoreError> {
/// Recursive function that matches all possible data types store in LanceDB and converts them to serde_json::Value.
fn type_matcher(column: &Arc<dyn Array>) -> Result<Vec<Value>, VectorStoreError> {
match column.data_type() {
DataType::Null => Ok(vec![serde_json::Value::Null]),
Expand Down
6 changes: 1 addition & 5 deletions rig-lancedb/src/utils/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,12 @@ use rig::vector_store::VectorStoreError;

use crate::lancedb_to_rig_error;

/// Trait that facilitates the conversion of columnar data returned by a lanceDb query to the desired struct.
/// Trait that facilitates the conversion of columnar data returned by a lanceDb query to serde_json::Value.
/// Used whenever a lanceDb table is queried.
/// First, execute the query and get the result as a list of RecordBatches (columnar data).
/// Then, convert the record batches to the desired type using the try_from trait.
pub trait Query {
async fn execute_query(&self) -> Result<Vec<serde_json::Value>, VectorStoreError>;
}

/// Same as the above trait but for the VectorQuery type.
/// Used whenever a lanceDb table vector search is executed.
impl Query for lancedb::query::VectorQuery {
async fn execute_query(&self) -> Result<Vec<serde_json::Value>, VectorStoreError> {
let record_batches = self
Expand Down

0 comments on commit 4a6a87d

Please sign in to comment.