From ff4618abe3f147c5354fccb982d5d300d90f2d6e Mon Sep 17 00:00:00 2001
From: Raphael Taylor-Davies <r.taylordavies@googlemail.com>
Date: Tue, 9 Apr 2024 16:18:38 +0100
Subject: [PATCH] Add more docs

---
 object_store/src/lib.rs     | 46 +++++++++++++++++++++++++++++++++++--
 object_store/src/payload.rs | 14 +++++------
 2 files changed, 51 insertions(+), 9 deletions(-)
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 692160a03596..157852ff9a6e 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -245,7 +245,7 @@
 //! # }
 //! ```
 //!
-//! #  Put Object
+//! # Put Object
 //!
 //! Use the [`ObjectStore::put`] method to atomically write data.
 //!
@@ -266,7 +266,7 @@
 //! # }
 //! ```
 //!
-//! #  Multipart Upload
+//! # Multipart Upload
 //!
 //! Use the [`ObjectStore::put_multipart`] method to atomically write a large amount of data
 //!
@@ -319,6 +319,48 @@
 //! # }
 //! ```
 //!
+//! # Vectored Write
+//!
+//! When writing data it is often the case that the size of the output is not known ahead of time.
+//!
+//! A common approach to handling this is to bump-allocate a `Vec`, whereby the underlying
+//! allocation is repeatedly reallocated, each time doubling the capacity. The performance of
+//! this is suboptimal as reallocating memory will often involve copying it to a new location.
+//!
+//! Fortunately, as [`PutPayload`] does not require memory regions to be contiguous, it is
+//! possible to instead allocate memory in chunks and avoid bump allocating. [`PutPayloadMut`]
+//! encapsulates this approach
+//!
+//! ```
+//! # use object_store::local::LocalFileSystem;
+//! # use object_store::{ObjectStore, PutPayloadMut};
+//! # use std::sync::Arc;
+//! # use bytes::Bytes;
+//! # use tokio::io::AsyncWriteExt;
+//! # use object_store::path::Path;
+//! # fn get_object_store() -> Arc<dyn ObjectStore> {
+//! #   Arc::new(LocalFileSystem::new())
+//! # }
+//! # async fn multi_upload() {
+//! #
+//! let object_store: Arc<dyn ObjectStore> = get_object_store();
+//! let path = Path::from("data/large_file");
+//! let mut buffer = PutPayloadMut::new().with_block_size(8192);
+//! for _ in 0..22 {
+//!     buffer.extend_from_slice(&[0; 1024]);
+//! }
+//! let payload = buffer.freeze();
+//!
+//! // Payload consists of 3 separate 8KB allocations
+//! assert_eq!(payload.as_ref().len(), 3);
+//! assert_eq!(payload.as_ref()[0].len(), 8192);
+//! assert_eq!(payload.as_ref()[1].len(), 8192);
+//! assert_eq!(payload.as_ref()[2].len(), 6144);
+//!
+//! object_store.put(&path, payload).await.unwrap();
+//! # }
+//! ```
+//!
 //! # Conditional Fetch
 //!
 //! More complex object retrieval can be supported by [`ObjectStore::get_opts`].
diff --git a/object_store/src/payload.rs b/object_store/src/payload.rs
index c957666a79c1..4e5d36d40185 100644
--- a/object_store/src/payload.rs
+++ b/object_store/src/payload.rs
@@ -189,7 +189,7 @@ pub struct PutPayloadMut {
     len: usize,
     completed: Vec<Bytes>,
     in_progress: Vec<u8>,
-    min_alloc: usize,
+    block_size: usize,
 }
 
 impl Default for PutPayloadMut {
@@ -199,7 +199,7 @@ impl Default for PutPayloadMut {
             completed: vec![],
             in_progress: vec![],
 
-            min_alloc: 8 * 1024,
+            block_size: 8 * 1024,
         }
     }
 }
@@ -210,11 +210,11 @@ impl PutPayloadMut {
         Self::default()
     }
 
-    /// Override the minimum allocation size
+    /// Allocate data in chunks of `block_size`
     ///
     /// Defaults to 8KB
-    pub fn with_minimum_allocation_size(self, min_alloc: usize) -> Self {
-        Self { min_alloc, ..self }
+    pub fn with_block_size(self, block_size: usize) -> Self {
+        Self { block_size, ..self }
     }
 
     /// Write bytes into this [`PutPayloadMut`]
@@ -224,7 +224,7 @@ impl PutPayloadMut {
 
         self.in_progress.extend_from_slice(&slice[..to_copy]);
         if self.in_progress.capacity() == self.in_progress.len() {
-            let new_cap = self.min_alloc.max(slice.len() - to_copy);
+            let new_cap = self.block_size.max(slice.len() - to_copy);
             let completed = std::mem::replace(&mut self.in_progress, Vec::with_capacity(new_cap));
             if !completed.is_empty() {
                 self.completed.push(completed.into())
@@ -277,7 +277,7 @@ mod test {
 
     #[test]
     fn test_put_payload() {
-        let mut chunk = PutPayloadMut::new().with_minimum_allocation_size(23);
+        let mut chunk = PutPayloadMut::new().with_block_size(23);
         chunk.extend_from_slice(&[1; 16]);
         chunk.extend_from_slice(&[2; 32]);
         chunk.extend_from_slice(&[2; 5]);