From ff4618abe3f147c5354fccb982d5d300d90f2d6e Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies Date: Tue, 9 Apr 2024 16:18:38 +0100 Subject: [PATCH] Add more docs --- object_store/src/lib.rs | 46 +++++++++++++++++++++++++++++++++++-- object_store/src/payload.rs | 14 +++++------ 2 files changed, 51 insertions(+), 9 deletions(-) diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs index 692160a03596..157852ff9a6e 100644 --- a/object_store/src/lib.rs +++ b/object_store/src/lib.rs @@ -245,7 +245,7 @@ //! # } //! ``` //! -//! # Put Object +//! # Put Object //! //! Use the [`ObjectStore::put`] method to atomically write data. //! @@ -266,7 +266,7 @@ //! # } //! ``` //! -//! # Multipart Upload +//! # Multipart Upload //! //! Use the [`ObjectStore::put_multipart`] method to atomically write a large amount of data //! @@ -319,6 +319,48 @@ //! # } //! ``` //! +//! # Vectored Write +//! +//! When writing data it is often the case that the size of the output is not known ahead of time. +//! +//! A common approach to handling this is to bump-allocate a `Vec`, whereby the underlying +//! allocation is repeatedly reallocated, each time doubling the capacity. The performance of +//! this is suboptimal as reallocating memory will often involve copying it to a new location. +//! +//! Fortunately, as [`PutPayload`] does not require memory regions to be contiguous, it is +//! possible to instead allocate memory in chunks and avoid bump allocating. [`PutPayloadMut`] +//! encapsulates this approach +//! +//! ``` +//! # use object_store::local::LocalFileSystem; +//! # use object_store::{ObjectStore, PutPayloadMut}; +//! # use std::sync::Arc; +//! # use bytes::Bytes; +//! # use tokio::io::AsyncWriteExt; +//! # use object_store::path::Path; +//! # fn get_object_store() -> Arc { +//! # Arc::new(LocalFileSystem::new()) +//! # } +//! # async fn multi_upload() { +//! # +//! let object_store: Arc = get_object_store(); +//! let path = Path::from("data/large_file"); +//! let mut buffer = PutPayloadMut::new().with_block_size(8192); +//! for _ in 0..22 { +//! buffer.extend_from_slice(&[0; 1024]); +//! } +//! let payload = buffer.freeze(); +//! +//! // Payload consists of 3 separate 8KB allocations +//! assert_eq!(payload.as_ref().len(), 3); +//! assert_eq!(payload.as_ref()[0].len(), 8192); +//! assert_eq!(payload.as_ref()[1].len(), 8192); +//! assert_eq!(payload.as_ref()[2].len(), 6144); +//! +//! object_store.put(&path, payload).await.unwrap(); +//! # } +//! ``` +//! //! # Conditional Fetch //! //! More complex object retrieval can be supported by [`ObjectStore::get_opts`]. diff --git a/object_store/src/payload.rs b/object_store/src/payload.rs index c957666a79c1..4e5d36d40185 100644 --- a/object_store/src/payload.rs +++ b/object_store/src/payload.rs @@ -189,7 +189,7 @@ pub struct PutPayloadMut { len: usize, completed: Vec, in_progress: Vec, - min_alloc: usize, + block_size: usize, } impl Default for PutPayloadMut { @@ -199,7 +199,7 @@ impl Default for PutPayloadMut { completed: vec![], in_progress: vec![], - min_alloc: 8 * 1024, + block_size: 8 * 1024, } } } @@ -210,11 +210,11 @@ impl PutPayloadMut { Self::default() } - /// Override the minimum allocation size + /// Allocate data in chunks of `block_size` /// /// Defaults to 8KB - pub fn with_minimum_allocation_size(self, min_alloc: usize) -> Self { - Self { min_alloc, ..self } + pub fn with_block_size(self, block_size: usize) -> Self { + Self { block_size, ..self } } /// Write bytes into this [`PutPayloadMut`] @@ -224,7 +224,7 @@ impl PutPayloadMut { self.in_progress.extend_from_slice(&slice[..to_copy]); if self.in_progress.capacity() == self.in_progress.len() { - let new_cap = self.min_alloc.max(slice.len() - to_copy); + let new_cap = self.block_size.max(slice.len() - to_copy); let completed = std::mem::replace(&mut self.in_progress, Vec::with_capacity(new_cap)); if !completed.is_empty() { self.completed.push(completed.into()) @@ -277,7 +277,7 @@ mod test { #[test] fn test_put_payload() { - let mut chunk = PutPayloadMut::new().with_minimum_allocation_size(23); + let mut chunk = PutPayloadMut::new().with_block_size(23); chunk.extend_from_slice(&[1; 16]); chunk.extend_from_slice(&[2; 32]); chunk.extend_from_slice(&[2; 5]);