From 5ed3fcc4013d506893f5daa3c77cb38ec9f528a3 Mon Sep 17 00:00:00 2001 From: Joshua Mo Date: Wed, 9 Apr 2025 19:17:41 +0100 Subject: [PATCH] refactor(lancedb): docs, make examples idempotent refactor: clippy --- rig-lancedb/README.md | 3 ++ .../examples/vector_search_local_ann.rs | 30 +++++++++----- .../examples/vector_search_local_enn.rs | 14 +++++-- rig-lancedb/examples/vector_search_s3_ann.rs | 40 ++++++++++++------- 4 files changed, 59 insertions(+), 28 deletions(-) diff --git a/rig-lancedb/README.md b/rig-lancedb/README.md index fd7be50..1c5e3ec 100644 --- a/rig-lancedb/README.md +++ b/rig-lancedb/README.md @@ -17,6 +17,9 @@ ## Rig-Lancedb This companion crate implements a Rig vector store based on Lancedb. +## Pre-requisites +If you are using `rig-lancedb` locally, you must ensure you have `protoc` (the [Protobuf Compiler](https://protobuf.dev/installation/)). + ## Usage Add the companion crate to your `Cargo.toml`, along with the rig-core crate: diff --git a/rig-lancedb/examples/vector_search_local_ann.rs b/rig-lancedb/examples/vector_search_local_ann.rs index a4415ba..b7885e0 100644 --- a/rig-lancedb/examples/vector_search_local_ann.rs +++ b/rig-lancedb/examples/vector_search_local_ann.rs @@ -38,8 +38,15 @@ async fn main() -> Result<(), anyhow::Error> { .build() .await?; - let table = db - .create_table( + let table = if db + .table_names() + .execute() + .await? + .contains(&"definitions".to_string()) + { + db.open_table("definitions").execute().await? + } else { + db.create_table( "definitions", RecordBatchIterator::new( vec![as_record_batch(embeddings, model.ndims())], @@ -47,16 +54,19 @@ async fn main() -> Result<(), anyhow::Error> { ), ) .execute() - .await?; + .await? + }; // See [LanceDB indexing](https://lancedb.github.io/lancedb/concepts/index_ivfpq/#product-quantization) for more information - table - .create_index( - &["embedding"], - lancedb::index::Index::IvfPq(IvfPqIndexBuilder::default()), - ) - .execute() - .await?; + if table.index_stats("embedding").await?.is_none() { + table + .create_index( + &["embedding"], + lancedb::index::Index::IvfPq(IvfPqIndexBuilder::default()), + ) + .execute() + .await?; + } // Define search_params params that will be used by the vector store to perform the vector search. let search_params = SearchParams::default(); diff --git a/rig-lancedb/examples/vector_search_local_enn.rs b/rig-lancedb/examples/vector_search_local_enn.rs index 5011238..a5dfd64 100644 --- a/rig-lancedb/examples/vector_search_local_enn.rs +++ b/rig-lancedb/examples/vector_search_local_enn.rs @@ -32,8 +32,15 @@ async fn main() -> Result<(), anyhow::Error> { // Initialize LanceDB locally. let db = lancedb::connect("data/lancedb-store").execute().await?; - let table = db - .create_table( + let table = if db + .table_names() + .execute() + .await? + .contains(&"definitions".to_string()) + { + db.open_table("definitions").execute().await? + } else { + db.create_table( "definitions", RecordBatchIterator::new( vec![as_record_batch(embeddings, model.ndims())], @@ -41,7 +48,8 @@ async fn main() -> Result<(), anyhow::Error> { ), ) .execute() - .await?; + .await? + }; let vector_store = LanceDbVectorIndex::new(table, model, "id", search_params).await?; diff --git a/rig-lancedb/examples/vector_search_s3_ann.rs b/rig-lancedb/examples/vector_search_s3_ann.rs index 61267e8..8f70d9f 100644 --- a/rig-lancedb/examples/vector_search_s3_ann.rs +++ b/rig-lancedb/examples/vector_search_s3_ann.rs @@ -44,8 +44,15 @@ async fn main() -> Result<(), anyhow::Error> { .build() .await?; - let table = db - .create_table( + let table = if db + .table_names() + .execute() + .await? + .contains(&"definitions".to_string()) + { + db.open_table("definitions").execute().await? + } else { + db.create_table( "definitions", RecordBatchIterator::new( vec![as_record_batch(embeddings, model.ndims())], @@ -53,21 +60,24 @@ async fn main() -> Result<(), anyhow::Error> { ), ) .execute() - .await?; + .await? + }; // See [LanceDB indexing](https://lancedb.github.io/lancedb/concepts/index_ivfpq/#product-quantization) for more information - table - .create_index( - &["embedding"], - lancedb::index::Index::IvfPq( - IvfPqIndexBuilder::default() - // This overrides the default distance type of L2. - // Needs to be the same distance type as the one used in search params. - .distance_type(DistanceType::Cosine), - ), - ) - .execute() - .await?; + if table.index_stats("embedding").await?.is_none() { + table + .create_index( + &["embedding"], + lancedb::index::Index::IvfPq( + IvfPqIndexBuilder::default() + // This overrides the default distance type of L2. + // Needs to be the same distance type as the one used in search params. + .distance_type(DistanceType::Cosine), + ), + ) + .execute() + .await?; + } // Define search_params params that will be used by the vector store to perform the vector search. let search_params = SearchParams::default().distance_type(DistanceType::Cosine);