From fc245b55ba85d3535f0702706e886ce67e95f1d2 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Mon, 6 Jan 2025 16:03:51 -0500
Subject: [PATCH 1/9] Improve `Buffer` documentation, deprecate
 `Buffer::from_bytes` add `From<Bytes>` and `From<bytes::Bytes>` impls (#6939)

* Improve Bytes documentation

* Improve Buffer documentation, add From<Bytes> and From<bytes::Bytes> impls

* avoid linking to private docs

* Deprecate `Buffer::from_bytes`

* Apply suggestions from code review

Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>

---------

Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
---
 arrow-buffer/src/buffer/immutable.rs          | 118 ++++++++++++++----
 arrow-buffer/src/buffer/mutable.rs            |   2 +-
 arrow-buffer/src/bytes.rs                     |   8 +-
 arrow-flight/src/decode.rs                    |   2 +-
 arrow-flight/src/sql/client.rs                |   2 +-
 .../src/arrow/array_reader/byte_view_array.rs |  10 +-
 6 files changed, 104 insertions(+), 38 deletions(-)
diff --git a/arrow-buffer/src/buffer/immutable.rs b/arrow-buffer/src/buffer/immutable.rs
index cf1d6f366751..fd145ce2306e 100644
--- a/arrow-buffer/src/buffer/immutable.rs
+++ b/arrow-buffer/src/buffer/immutable.rs
@@ -28,8 +28,43 @@ use crate::{bit_util, bytes::Bytes, native::ArrowNativeType};
 use super::ops::bitwise_unary_op_helper;
 use super::{MutableBuffer, ScalarBuffer};
 
-/// Buffer represents a contiguous memory region that can be shared with other buffers and across
-/// thread boundaries.
+/// A contiguous memory region that can be shared with other buffers and across
+/// thread boundaries that stores Arrow data.
+///
+/// `Buffer`s can be sliced and cloned without copying the underlying data and can
+/// be created from memory allocated by non-Rust sources such as C/C++.
+///
+/// # Example: Create a `Buffer` from a `Vec` (without copying)
+/// ```
+/// # use arrow_buffer::Buffer;
+/// let vec: Vec<u32> = vec![1, 2, 3];
+/// let buffer = Buffer::from(vec);
+/// ```
+///
+/// # Example: Convert a `Buffer` to a `Vec` (without copying)
+///
+/// Use [`Self::into_vec`] to convert a `Buffer` back into a `Vec` if there are
+/// no other references and the types are aligned correctly.
+/// ```
+/// # use arrow_buffer::Buffer;
+/// # let vec: Vec<u32> = vec![1, 2, 3];
+/// # let buffer = Buffer::from(vec);
+/// // convert the buffer back into a Vec of u32
+/// // note this will fail if the buffer is shared or not aligned correctly
+/// let vec: Vec<u32> = buffer.into_vec().unwrap();
+/// ```
+///
+/// # Example: Create a `Buffer` from a [`bytes::Bytes`] (without copying)
+///
+/// [`bytes::Bytes`] is a common type in the Rust ecosystem for shared memory
+/// regions. You can create a buffer from a `Bytes` instance using the `From`
+/// implementation, also without copying.
+///
+/// ```
+/// # use arrow_buffer::Buffer;
+/// let bytes = bytes::Bytes::from("hello");
+/// let buffer = Buffer::from(bytes);
+///```
 #[derive(Clone, Debug)]
 pub struct Buffer {
     /// the internal byte buffer.
@@ -59,24 +94,15 @@ unsafe impl Send for Buffer where Bytes: Send {}
 unsafe impl Sync for Buffer where Bytes: Sync {}
 
 impl Buffer {
-    /// Auxiliary method to create a new Buffer
+    /// Create a new Buffer from a (internal) `Bytes`
     ///
-    /// This can be used with a [`bytes::Bytes`] via `into()`:
+    /// NOTE despite the same name, `Bytes` is an internal struct in arrow-rs
+    /// and is different than [`bytes::Bytes`].
     ///
-    /// ```
-    /// # use arrow_buffer::Buffer;
-    /// let bytes = bytes::Bytes::from_static(b"foo");
-    /// let buffer = Buffer::from_bytes(bytes.into());
-    /// ```
-    #[inline]
+    /// See examples on [`Buffer`] for ways to create a buffer from a [`bytes::Bytes`].
+    #[deprecated(since = "54.1.0", note = "Use Buffer::from instead")]
     pub fn from_bytes(bytes: Bytes) -> Self {
-        let length = bytes.len();
-        let ptr = bytes.as_ptr();
-        Buffer {
-            data: Arc::new(bytes),
-            ptr,
-            length,
-        }
+        Self::from(bytes)
     }
 
     /// Returns the offset, in bytes, of `Self::ptr` to `Self::data`
@@ -107,8 +133,11 @@ impl Buffer {
         buffer.into()
     }
 
-    /// Creates a buffer from an existing memory region. Ownership of the memory is tracked via reference counting
-    /// and the memory will be freed using the `drop` method of [crate::alloc::Allocation] when the reference count reaches zero.
+    /// Creates a buffer from an existing memory region.
+    ///
+    /// Ownership of the memory is tracked via reference counting
+    /// and the memory will be freed using the `drop` method of
+    /// [crate::alloc::Allocation] when the reference count reaches zero.
     ///
     /// # Arguments
     ///
@@ -155,7 +184,7 @@ impl Buffer {
         self.data.capacity()
     }
 
-    /// Tried to shrink the capacity of the buffer as much as possible, freeing unused memory.
+    /// Tries to shrink the capacity of the buffer as much as possible, freeing unused memory.
     ///
     /// If the buffer is shared, this is a no-op.
     ///
@@ -190,7 +219,7 @@ impl Buffer {
         }
     }
 
-    /// Returns whether the buffer is empty.
+    /// Returns true if the buffer is empty.
     #[inline]
     pub fn is_empty(&self) -> bool {
         self.length == 0
@@ -206,7 +235,9 @@ impl Buffer {
     }
 
     /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`.
-    /// Doing so allows the same memory region to be shared between buffers.
+    ///
+    /// This function is `O(1)` and does not copy any data, allowing the
+    /// same memory region to be shared between buffers.
     ///
     /// # Panics
     ///
@@ -240,7 +271,10 @@ impl Buffer {
 
     /// Returns a new [Buffer] that is a slice of this buffer starting at `offset`,
     /// with `length` bytes.
-    /// Doing so allows the same memory region to be shared between buffers.
+    ///
+    /// This function is `O(1)` and does not copy any data, allowing the same
+    /// memory region to be shared between buffers.
+    ///
     /// # Panics
     /// Panics iff `(offset + length)` is larger than the existing length.
     pub fn slice_with_length(&self, offset: usize, length: usize) -> Self {
@@ -328,10 +362,16 @@ impl Buffer {
             })
     }
 
-    /// Returns `Vec` for mutating the buffer
+    /// Converts self into a `Vec`, if possible.
+    ///
+    /// This can be used to reuse / mutate the underlying data.
     ///
-    /// Returns `Err(self)` if this buffer does not have the same [`Layout`] as
-    /// the destination Vec or contains a non-zero offset
+    /// # Errors
+    ///
+    /// Returns `Err(self)` if
+    /// 1. this buffer does not have the same [`Layout`] as the destination Vec
+    /// 2. contains a non-zero offset
+    /// 3. The buffer is shared
     pub fn into_vec<T: ArrowNativeType>(self) -> Result<Vec<T>, Self> {
         let layout = match self.data.deallocation() {
             Deallocation::Standard(l) => l,
@@ -414,7 +454,29 @@ impl<T: ArrowNativeType> From<ScalarBuffer<T>> for Buffer {
     }
 }
 
-/// Creating a `Buffer` instance by storing the boolean values into the buffer
+/// Convert from internal `Bytes` (not [`bytes::Bytes`]) to `Buffer`
+impl From<Bytes> for Buffer {
+    #[inline]
+    fn from(bytes: Bytes) -> Self {
+        let length = bytes.len();
+        let ptr = bytes.as_ptr();
+        Self {
+            data: Arc::new(bytes),
+            ptr,
+            length,
+        }
+    }
+}
+
+/// Convert from [`bytes::Bytes`], not internal `Bytes` to `Buffer`
+impl From<bytes::Bytes> for Buffer {
+    fn from(bytes: bytes::Bytes) -> Self {
+        let bytes: Bytes = bytes.into();
+        Self::from(bytes)
+    }
+}
+
+/// Create a `Buffer` instance by storing the boolean values into the buffer
 impl FromIterator<bool> for Buffer {
     fn from_iter<I>(iter: I) -> Self
     where
@@ -447,7 +509,9 @@ impl<T: ArrowNativeType> From<BufferBuilder<T>> for Buffer {
 
 impl Buffer {
     /// Creates a [`Buffer`] from an [`Iterator`] with a trusted (upper) length.
+    ///
     /// Prefer this to `collect` whenever possible, as it is ~60% faster.
+    ///
     /// # Example
     /// ```
     /// # use arrow_buffer::buffer::Buffer;
diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
index c4315a1d64cd..5ad55e306e2a 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -328,7 +328,7 @@ impl MutableBuffer {
     pub(super) fn into_buffer(self) -> Buffer {
         let bytes = unsafe { Bytes::new(self.data, self.len, Deallocation::Standard(self.layout)) };
         std::mem::forget(self);
-        Buffer::from_bytes(bytes)
+        Buffer::from(bytes)
     }
 
     /// View this buffer as a mutable slice of a specific type.
diff --git a/arrow-buffer/src/bytes.rs b/arrow-buffer/src/bytes.rs
index 77724137aef7..b811bd2c6b40 100644
--- a/arrow-buffer/src/bytes.rs
+++ b/arrow-buffer/src/bytes.rs
@@ -28,14 +28,18 @@ use crate::buffer::dangling_ptr;
 
 /// A continuous, fixed-size, immutable memory region that knows how to de-allocate itself.
 ///
-/// This structs' API is inspired by the `bytes::Bytes`, but it is not limited to using rust's
-/// global allocator nor u8 alignment.
+/// Note that this structure is an internal implementation detail of the
+/// arrow-rs crate. While it has the same name and similar API as
+/// [`bytes::Bytes`] it is not limited to rust's global allocator nor u8
+/// alignment. It is possible to create a `Bytes` from `bytes::Bytes` using the
+/// `From` implementation.
 ///
 /// In the most common case, this buffer is allocated using [`alloc`](std::alloc::alloc)
 /// with an alignment of [`ALIGNMENT`](crate::alloc::ALIGNMENT)
 ///
 /// When the region is allocated by a different allocator, [Deallocation::Custom], this calls the
 /// custom deallocator to deallocate the region when it is no longer needed.
+///
 pub struct Bytes {
     /// The raw pointer to be beginning of the region
     ptr: NonNull<u8>,
diff --git a/arrow-flight/src/decode.rs b/arrow-flight/src/decode.rs
index 7bafc384306b..760fc926fca6 100644
--- a/arrow-flight/src/decode.rs
+++ b/arrow-flight/src/decode.rs
@@ -295,7 +295,7 @@ impl FlightDataDecoder {
                     ));
                 };
 
-                let buffer = Buffer::from_bytes(data.data_body.into());
+                let buffer = Buffer::from(data.data_body);
                 let dictionary_batch = message.header_as_dictionary_batch().ok_or_else(|| {
                     FlightError::protocol(
                         "Could not get dictionary batch from DictionaryBatch message",
diff --git a/arrow-flight/src/sql/client.rs b/arrow-flight/src/sql/client.rs
index a6e228737b3f..6d3ac3dbe610 100644
--- a/arrow-flight/src/sql/client.rs
+++ b/arrow-flight/src/sql/client.rs
@@ -721,7 +721,7 @@ pub fn arrow_data_from_flight_data(
 
             let dictionaries_by_field = HashMap::new();
             let record_batch = read_record_batch(
-                &Buffer::from_bytes(flight_data.data_body.into()),
+                &Buffer::from(flight_data.data_body),
                 ipc_record_batch,
                 arrow_schema_ref.clone(),
                 &dictionaries_by_field,
diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs
index 5845e2c08cec..92a8b0592d0d 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -316,9 +316,8 @@ impl ByteViewArrayDecoderPlain {
     }
 
     pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> Result<usize> {
-        // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy
-        // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy
-        let buf = arrow_buffer::Buffer::from_bytes(self.buf.clone().into());
+        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
+        let buf = arrow_buffer::Buffer::from(self.buf.clone());
         let block_id = output.append_block(buf);
 
         let to_read = len.min(self.max_remaining_values);
@@ -549,9 +548,8 @@ impl ByteViewArrayDecoderDeltaLength {
 
         let src_lengths = &self.lengths[self.length_offset..self.length_offset + to_read];
 
-        // Here we convert `bytes::Bytes` into `arrow_buffer::Bytes`, which is zero copy
-        // Then we convert `arrow_buffer::Bytes` into `arrow_buffer:Buffer`, which is also zero copy
-        let bytes = arrow_buffer::Buffer::from_bytes(self.data.clone().into());
+        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
+        let bytes = Buffer::from(self.data.clone());
         let block_id = output.append_block(bytes);
 
         let mut current_offset = self.data_offset;

From a160e94aa8f1845a264ef208a2ab0fb8d9137240 Mon Sep 17 00:00:00 2001
From: Jinpeng <zjpzlz@163.com>
Date: Mon, 6 Jan 2025 16:09:05 -0500
Subject: [PATCH 2/9] Convert some panics that happen on invalid parquet files
 to error results (#6738)

* Reduce  panics

* t pushmove integer logical type from format.rs to schema type.rs

* remove some changes as per reviews

* use wrapping_shl

* fix typo in error message

* return error for invalid decimal length

---------

Co-authored-by: jp0317 <zjpzlz@gmail.com>
Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 parquet/src/errors.rs                  |  7 ++++
 parquet/src/file/metadata/reader.rs    | 26 ++++++-------
 parquet/src/file/serialized_reader.rs  | 53 ++++++++++++++++++++++----
 parquet/src/file/statistics.rs         | 26 +++++++++++++
 parquet/src/schema/types.rs            | 25 +++++++++++-
 parquet/src/thrift.rs                  | 35 ++++++++++++++---
 parquet/tests/arrow_reader/bad_data.rs |  2 +-
 7 files changed, 146 insertions(+), 28 deletions(-)

diff --git a/parquet/src/errors.rs b/parquet/src/errors.rs
index 8dc97f4ca2e6..d749287bba62 100644
--- a/parquet/src/errors.rs
+++ b/parquet/src/errors.rs
@@ -17,6 +17,7 @@
 
 //! Common Parquet errors and macros.
 
+use core::num::TryFromIntError;
 use std::error::Error;
 use std::{cell, io, result, str};
 
@@ -81,6 +82,12 @@ impl Error for ParquetError {
     }
 }
 
+impl From<TryFromIntError> for ParquetError {
+    fn from(e: TryFromIntError) -> ParquetError {
+        ParquetError::General(format!("Integer overflow: {e}"))
+    }
+}
+
 impl From<io::Error> for ParquetError {
     fn from(e: io::Error) -> ParquetError {
         ParquetError::External(Box::new(e))
diff --git a/parquet/src/file/metadata/reader.rs b/parquet/src/file/metadata/reader.rs
index ec2cd1094d3a..c6715a33b5ae 100644
--- a/parquet/src/file/metadata/reader.rs
+++ b/parquet/src/file/metadata/reader.rs
@@ -627,7 +627,8 @@ impl ParquetMetaDataReader {
         for rg in t_file_metadata.row_groups {
             row_groups.push(RowGroupMetaData::from_thrift(schema_descr.clone(), rg)?);
         }
-        let column_orders = Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr);
+        let column_orders =
+            Self::parse_column_orders(t_file_metadata.column_orders, &schema_descr)?;
 
         let file_metadata = FileMetaData::new(
             t_file_metadata.version,
@@ -645,15 +646,13 @@ impl ParquetMetaDataReader {
     fn parse_column_orders(
         t_column_orders: Option<Vec<TColumnOrder>>,
         schema_descr: &SchemaDescriptor,
-    ) -> Option<Vec<ColumnOrder>> {
+    ) -> Result<Option<Vec<ColumnOrder>>> {
         match t_column_orders {
             Some(orders) => {
                 // Should always be the case
-                assert_eq!(
-                    orders.len(),
-                    schema_descr.num_columns(),
-                    "Column order length mismatch"
-                );
+                if orders.len() != schema_descr.num_columns() {
+                    return Err(general_err!("Column order length mismatch"));
+                };
                 let mut res = Vec::new();
                 for (i, column) in schema_descr.columns().iter().enumerate() {
                     match orders[i] {
@@ -667,9 +666,9 @@ impl ParquetMetaDataReader {
                         }
                     }
                 }
-                Some(res)
+                Ok(Some(res))
             }
-            None => None,
+            None => Ok(None),
         }
     }
 }
@@ -741,7 +740,7 @@ mod tests {
         ]);
 
         assert_eq!(
-            ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr),
+            ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr).unwrap(),
             Some(vec![
                 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED),
                 ColumnOrder::TYPE_DEFINED_ORDER(SortOrder::SIGNED)
@@ -750,20 +749,21 @@ mod tests {
 
         // Test when no column orders are defined.
         assert_eq!(
-            ParquetMetaDataReader::parse_column_orders(None, &schema_descr),
+            ParquetMetaDataReader::parse_column_orders(None, &schema_descr).unwrap(),
             None
         );
     }
 
     #[test]
-    #[should_panic(expected = "Column order length mismatch")]
     fn test_metadata_column_orders_len_mismatch() {
         let schema = SchemaType::group_type_builder("schema").build().unwrap();
         let schema_descr = SchemaDescriptor::new(Arc::new(schema));
 
         let t_column_orders = Some(vec![TColumnOrder::TYPEORDER(TypeDefinedOrder::new())]);
 
-        ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr);
+        let res = ParquetMetaDataReader::parse_column_orders(t_column_orders, &schema_descr);
+        assert!(res.is_err());
+        assert!(format!("{:?}", res.unwrap_err()).contains("Column order length mismatch"));
     }
 
     #[test]
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index 06f3cf9fb23f..a942481f7e4d 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -435,7 +435,7 @@ pub(crate) fn decode_page(
             let is_sorted = dict_header.is_sorted.unwrap_or(false);
             Page::DictionaryPage {
                 buf: buffer,
-                num_values: dict_header.num_values as u32,
+                num_values: dict_header.num_values.try_into()?,
                 encoding: Encoding::try_from(dict_header.encoding)?,
                 is_sorted,
             }
@@ -446,7 +446,7 @@ pub(crate) fn decode_page(
                 .ok_or_else(|| ParquetError::General("Missing V1 data page header".to_string()))?;
             Page::DataPage {
                 buf: buffer,
-                num_values: header.num_values as u32,
+                num_values: header.num_values.try_into()?,
                 encoding: Encoding::try_from(header.encoding)?,
                 def_level_encoding: Encoding::try_from(header.definition_level_encoding)?,
                 rep_level_encoding: Encoding::try_from(header.repetition_level_encoding)?,
@@ -460,12 +460,12 @@ pub(crate) fn decode_page(
             let is_compressed = header.is_compressed.unwrap_or(true);
             Page::DataPageV2 {
                 buf: buffer,
-                num_values: header.num_values as u32,
+                num_values: header.num_values.try_into()?,
                 encoding: Encoding::try_from(header.encoding)?,
-                num_nulls: header.num_nulls as u32,
-                num_rows: header.num_rows as u32,
-                def_levels_byte_len: header.definition_levels_byte_length as u32,
-                rep_levels_byte_len: header.repetition_levels_byte_length as u32,
+                num_nulls: header.num_nulls.try_into()?,
+                num_rows: header.num_rows.try_into()?,
+                def_levels_byte_len: header.definition_levels_byte_length.try_into()?,
+                rep_levels_byte_len: header.repetition_levels_byte_length.try_into()?,
                 is_compressed,
                 statistics: statistics::from_thrift(physical_type, header.statistics)?,
             }
@@ -578,6 +578,27 @@ impl<R: ChunkReader> Iterator for SerializedPageReader<R> {
     }
 }
 
+fn verify_page_header_len(header_len: usize, remaining_bytes: usize) -> Result<()> {
+    if header_len > remaining_bytes {
+        return Err(eof_err!("Invalid page header"));
+    }
+    Ok(())
+}
+
+fn verify_page_size(
+    compressed_size: i32,
+    uncompressed_size: i32,
+    remaining_bytes: usize,
+) -> Result<()> {
+    // The page's compressed size should not exceed the remaining bytes that are
+    // available to read. The page's uncompressed size is the expected size
+    // after decompression, which can never be negative.
+    if compressed_size < 0 || compressed_size as usize > remaining_bytes || uncompressed_size < 0 {
+        return Err(eof_err!("Invalid page header"));
+    }
+    Ok(())
+}
+
 impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
     fn get_next_page(&mut self) -> Result<Option<Page>> {
         loop {
@@ -596,10 +617,16 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                         *header
                     } else {
                         let (header_len, header) = read_page_header_len(&mut read)?;
+                        verify_page_header_len(header_len, *remaining)?;
                         *offset += header_len;
                         *remaining -= header_len;
                         header
                     };
+                    verify_page_size(
+                        header.compressed_page_size,
+                        header.uncompressed_page_size,
+                        *remaining,
+                    )?;
                     let data_len = header.compressed_page_size as usize;
                     *offset += data_len;
                     *remaining -= data_len;
@@ -683,6 +710,7 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                     } else {
                         let mut read = self.reader.get_read(*offset as u64)?;
                         let (header_len, header) = read_page_header_len(&mut read)?;
+                        verify_page_header_len(header_len, *remaining_bytes)?;
                         *offset += header_len;
                         *remaining_bytes -= header_len;
                         let page_meta = if let Ok(page_meta) = (&header).try_into() {
@@ -733,12 +761,23 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
                 next_page_header,
             } => {
                 if let Some(buffered_header) = next_page_header.take() {
+                    verify_page_size(
+                        buffered_header.compressed_page_size,
+                        buffered_header.uncompressed_page_size,
+                        *remaining_bytes,
+                    )?;
                     // The next page header has already been peeked, so just advance the offset
                     *offset += buffered_header.compressed_page_size as usize;
                     *remaining_bytes -= buffered_header.compressed_page_size as usize;
                 } else {
                     let mut read = self.reader.get_read(*offset as u64)?;
                     let (header_len, header) = read_page_header_len(&mut read)?;
+                    verify_page_header_len(header_len, *remaining_bytes)?;
+                    verify_page_size(
+                        header.compressed_page_size,
+                        header.uncompressed_page_size,
+                        *remaining_bytes,
+                    )?;
                     let data_page_size = header.compressed_page_size as usize;
                     *offset += header_len + data_page_size;
                     *remaining_bytes -= header_len + data_page_size;
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index 2e05b83369cf..b7522a76f0fc 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -157,6 +157,32 @@ pub fn from_thrift(
                 stats.max_value
             };
 
+            fn check_len(min: &Option<Vec<u8>>, max: &Option<Vec<u8>>, len: usize) -> Result<()> {
+                if let Some(min) = min {
+                    if min.len() < len {
+                        return Err(ParquetError::General(
+                            "Insufficient bytes to parse min statistic".to_string(),
+                        ));
+                    }
+                }
+                if let Some(max) = max {
+                    if max.len() < len {
+                        return Err(ParquetError::General(
+                            "Insufficient bytes to parse max statistic".to_string(),
+                        ));
+                    }
+                }
+                Ok(())
+            }
+
+            match physical_type {
+                Type::BOOLEAN => check_len(&min, &max, 1),
+                Type::INT32 | Type::FLOAT => check_len(&min, &max, 4),
+                Type::INT64 | Type::DOUBLE => check_len(&min, &max, 8),
+                Type::INT96 => check_len(&min, &max, 12),
+                _ => Ok(()),
+            }?;
+
             // Values are encoded using PLAIN encoding definition, except that
             // variable-length byte arrays do not include a length prefix.
             //
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index d168e46de047..d9e9b22e809f 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -556,7 +556,11 @@ impl<'a> PrimitiveTypeBuilder<'a> {
                 }
             }
             PhysicalType::FIXED_LEN_BYTE_ARRAY => {
-                let max_precision = (2f64.powi(8 * self.length - 1) - 1f64).log10().floor() as i32;
+                let length = self
+                    .length
+                    .checked_mul(8)
+                    .ok_or(general_err!("Invalid length {} for Decimal", self.length))?;
+                let max_precision = (2f64.powi(length - 1) - 1f64).log10().floor() as i32;
 
                 if self.precision > max_precision {
                     return Err(general_err!(
@@ -1171,9 +1175,25 @@ pub fn from_thrift(elements: &[SchemaElement]) -> Result<TypePtr> {
         ));
     }
 
+    if !schema_nodes[0].is_group() {
+        return Err(general_err!("Expected root node to be a group type"));
+    }
+
     Ok(schema_nodes.remove(0))
 }
 
+/// Checks if the logical type is valid.
+fn check_logical_type(logical_type: &Option<LogicalType>) -> Result<()> {
+    if let Some(LogicalType::Integer { bit_width, .. }) = *logical_type {
+        if bit_width != 8 && bit_width != 16 && bit_width != 32 && bit_width != 64 {
+            return Err(general_err!(
+                "Bit width must be 8, 16, 32, or 64 for Integer logical type"
+            ));
+        }
+    }
+    Ok(())
+}
+
 /// Constructs a new Type from the `elements`, starting at index `index`.
 /// The first result is the starting index for the next Type after this one. If it is
 /// equal to `elements.len()`, then this Type is the last one.
@@ -1198,6 +1218,9 @@ fn from_thrift_helper(elements: &[SchemaElement], index: usize) -> Result<(usize
         .logical_type
         .as_ref()
         .map(|value| LogicalType::from(value.clone()));
+
+    check_logical_type(&logical_type)?;
+
     let field_id = elements[index].field_id;
     match elements[index].num_children {
         // From parquet-format:
diff --git a/parquet/src/thrift.rs b/parquet/src/thrift.rs
index ceb6b1c29fe8..b216fec6f3e7 100644
--- a/parquet/src/thrift.rs
+++ b/parquet/src/thrift.rs
@@ -67,7 +67,7 @@ impl<'a> TCompactSliceInputProtocol<'a> {
         let mut shift = 0;
         loop {
             let byte = self.read_byte()?;
-            in_progress |= ((byte & 0x7F) as u64) << shift;
+            in_progress |= ((byte & 0x7F) as u64).wrapping_shl(shift);
             shift += 7;
             if byte & 0x80 == 0 {
                 return Ok(in_progress);
@@ -96,13 +96,22 @@ impl<'a> TCompactSliceInputProtocol<'a> {
     }
 }
 
+macro_rules! thrift_unimplemented {
+    () => {
+        Err(thrift::Error::Protocol(thrift::ProtocolError {
+            kind: thrift::ProtocolErrorKind::NotImplemented,
+            message: "not implemented".to_string(),
+        }))
+    };
+}
+
 impl TInputProtocol for TCompactSliceInputProtocol<'_> {
     fn read_message_begin(&mut self) -> thrift::Result<TMessageIdentifier> {
         unimplemented!()
     }
 
     fn read_message_end(&mut self) -> thrift::Result<()> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_struct_begin(&mut self) -> thrift::Result<Option<TStructIdentifier>> {
@@ -147,7 +156,21 @@ impl TInputProtocol for TCompactSliceInputProtocol<'_> {
             ),
             _ => {
                 if field_delta != 0 {
-                    self.last_read_field_id += field_delta as i16;
+                    self.last_read_field_id = self
+                        .last_read_field_id
+                        .checked_add(field_delta as i16)
+                        .map_or_else(
+                            || {
+                                Err(thrift::Error::Protocol(thrift::ProtocolError {
+                                    kind: thrift::ProtocolErrorKind::InvalidData,
+                                    message: format!(
+                                        "cannot add {} to {}",
+                                        field_delta, self.last_read_field_id
+                                    ),
+                                }))
+                            },
+                            Ok,
+                        )?;
                 } else {
                     self.last_read_field_id = self.read_i16()?;
                 };
@@ -226,15 +249,15 @@ impl TInputProtocol for TCompactSliceInputProtocol<'_> {
     }
 
     fn read_set_begin(&mut self) -> thrift::Result<TSetIdentifier> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_set_end(&mut self) -> thrift::Result<()> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_map_begin(&mut self) -> thrift::Result<TMapIdentifier> {
-        unimplemented!()
+        thrift_unimplemented!()
     }
 
     fn read_map_end(&mut self) -> thrift::Result<()> {
diff --git a/parquet/tests/arrow_reader/bad_data.rs b/parquet/tests/arrow_reader/bad_data.rs
index 74342031432a..cfd61e82d32b 100644
--- a/parquet/tests/arrow_reader/bad_data.rs
+++ b/parquet/tests/arrow_reader/bad_data.rs
@@ -106,7 +106,7 @@ fn test_arrow_rs_gh_6229_dict_header() {
     let err = read_file("ARROW-RS-GH-6229-DICTHEADER.parquet").unwrap_err();
     assert_eq!(
         err.to_string(),
-        "External: Parquet argument error: EOF: eof decoding byte array"
+        "External: Parquet argument error: Parquet error: Integer overflow: out of range integral type conversion attempted"
     );
 }
 

From 4f1f6e57c568fae8233ab9da7d7c7acdaea4112a Mon Sep 17 00:00:00 2001
From: June <61218022+itsjunetime@users.noreply.github.com>
Date: Mon, 6 Jan 2025 14:13:56 -0700
Subject: [PATCH 3/9] Update MSRVs to be accurate (#6742)

* Update most MSRVs

* Make cargo-msrv verify every package in repo instead of just a select few and purposefully break arrow-flight msrv

* Add test to ensure workspace rust version is being used at least somewhere

* Fix exit1 => exit 1

* Make arrow-flight work, at the very least, with 'cargo metadata'

* Fix arrow-flight/gen rust-version to make CI pass now

* Get rid of pretty msrv logging as it can't all be displayed

* Do '-mindepth 2' with find to prevent running cargo msrv on the workspace as a whole

* Use correct MSRV for object_store

* remove workspace msrv check

* revert msrv

* push object_store MSRV back down to 1.62.1

* Revert unrelated formatting changes

* Fix object_store msrv

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
Co-authored-by: Jeffrey Vo <jeffrey.vo.australia@gmail.com>
---
 .github/workflows/rust.yml                   | 28 +++++---------------
 Cargo.toml                                   |  2 +-
 arrow-flight/gen/Cargo.toml                  |  2 +-
 arrow-integration-testing/Cargo.toml         |  2 +-
 arrow-pyarrow-integration-testing/Cargo.toml |  2 +-
 arrow-schema/Cargo.toml                      |  2 +-
 arrow/Cargo.toml                             |  2 +-
 parquet/Cargo.toml                           |  2 +-
 8 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml
index 044250b70435..ca0d2441ceae 100644
--- a/.github/workflows/rust.yml
+++ b/.github/workflows/rust.yml
@@ -123,23 +123,6 @@ jobs:
         uses: ./.github/actions/setup-builder
       - name: Install cargo-msrv
         run: cargo install cargo-msrv
-      - name: Downgrade arrow dependencies
-        run: cargo update -p ahash --precise 0.8.7
-      - name: Check arrow
-        working-directory: arrow
-        run: |
-          # run `cd arrow; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
-      - name: Check parquet
-        working-directory: parquet
-        run: |
-          # run `cd parquet; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
-      - name: Check arrow-flight
-        working-directory: arrow-flight
-        run: |
-          # run `cd arrow-flight; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
       - name: Downgrade object_store dependencies
         working-directory: object_store
         # Necessary because tokio 1.30.0 updates MSRV to 1.63
@@ -147,8 +130,11 @@ jobs:
         run: |
           cargo update -p tokio --precise 1.29.1
           cargo update -p url --precise 2.5.0
-      - name: Check object_store
-        working-directory: object_store
+      - name: Check all packages
         run: |
-          # run `cd object_store; cargo msrv verify` to see problematic dependencies
-          cargo msrv verify --output-format=json
+          # run `cargo msrv verify --manifest-path "path/to/Cargo.toml"` to see problematic dependencies
+          find . -mindepth 2 -name Cargo.toml | while read -r dir
+          do
+            echo "Checking package '$dir'"
+            cargo msrv verify --manifest-path "$dir" --output-format=json || exit 1
+          done
diff --git a/Cargo.toml b/Cargo.toml
index 75ba410f12a6..39e3c0bca99a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -74,7 +74,7 @@ include = [
     "Cargo.toml",
 ]
 edition = "2021"
-rust-version = "1.62"
+rust-version = "1.70"
 
 [workspace.dependencies]
 arrow = { version = "54.0.0", path = "./arrow", default-features = false }
diff --git a/arrow-flight/gen/Cargo.toml b/arrow-flight/gen/Cargo.toml
index 6358227a8912..e52efbf67e21 100644
--- a/arrow-flight/gen/Cargo.toml
+++ b/arrow-flight/gen/Cargo.toml
@@ -20,7 +20,7 @@ name = "gen"
 description = "Code generation for arrow-flight"
 version = "0.1.0"
 edition = { workspace = true }
-rust-version = { workspace = true }
+rust-version = "1.71.1"
 authors = { workspace = true }
 homepage = { workspace = true }
 repository = { workspace = true }
diff --git a/arrow-integration-testing/Cargo.toml b/arrow-integration-testing/Cargo.toml
index 8654b4b92734..26cb05fae1c2 100644
--- a/arrow-integration-testing/Cargo.toml
+++ b/arrow-integration-testing/Cargo.toml
@@ -25,7 +25,7 @@ authors = { workspace = true }
 license = { workspace = true }
 edition = { workspace = true }
 publish = false
-rust-version = { workspace = true }
+rust-version = "1.75.0"
 
 [lib]
 crate-type = ["lib", "cdylib"]
diff --git a/arrow-pyarrow-integration-testing/Cargo.toml b/arrow-pyarrow-integration-testing/Cargo.toml
index 03d08df30959..4ead95fcb912 100644
--- a/arrow-pyarrow-integration-testing/Cargo.toml
+++ b/arrow-pyarrow-integration-testing/Cargo.toml
@@ -25,7 +25,7 @@ authors = ["Apache Arrow <dev@arrow.apache.org>"]
 license = "Apache-2.0"
 keywords = [ "arrow" ]
 edition = "2021"
-rust-version = "1.62"
+rust-version = "1.70"
 publish = false
 
 [lib]
diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml
index 1e1f9fbde0e4..d1bcf046b7ca 100644
--- a/arrow-schema/Cargo.toml
+++ b/arrow-schema/Cargo.toml
@@ -26,7 +26,7 @@ license = { workspace = true }
 keywords = { workspace = true }
 include = { workspace = true }
 edition = { workspace = true }
-rust-version = { workspace = true }
+rust-version = "1.64"
 
 [lib]
 name = "arrow_schema"
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 8860cd61c5b3..a1c9c0ab2113 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -31,7 +31,7 @@ include = [
     "Cargo.toml",
 ]
 edition = { workspace = true }
-rust-version = "1.70.0"
+rust-version = { workspace = true }
 
 [lib]
 name = "arrow"
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index 19f890710778..e4085472ea20 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -26,7 +26,7 @@ authors = { workspace = true }
 keywords = ["arrow", "parquet", "hadoop"]
 readme = "README.md"
 edition = { workspace = true }
-rust-version = "1.70.0"
+rust-version = { workspace = true }
 
 [target.'cfg(target_arch = "wasm32")'.dependencies]
 ahash = { version = "0.8", default-features = false, features = ["compile-time-rng"] }

From f18dadd7093cbed66ee42738d6564950168d3fe3 Mon Sep 17 00:00:00 2001
From: Andrew Lamb <andrew@nerdnetworks.org>
Date: Wed, 8 Jan 2025 09:02:23 -0500
Subject: [PATCH 4/9] Document the `ParquetRecordBatchStream` buffering (#6947)

* Document the ParquetRecordBatchStream buffering

* Update parquet/src/arrow/async_reader/mod.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 parquet/src/arrow/async_reader/mod.rs | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs
index 4f3befe42662..5323251b07e7 100644
--- a/parquet/src/arrow/async_reader/mod.rs
+++ b/parquet/src/arrow/async_reader/mod.rs
@@ -611,11 +611,23 @@ impl<T> std::fmt::Debug for StreamState<T> {
     }
 }
 
-/// An asynchronous [`Stream`](https://docs.rs/futures/latest/futures/stream/trait.Stream.html) of [`RecordBatch`]
-/// for a parquet file that can be constructed using [`ParquetRecordBatchStreamBuilder`].
+/// An asynchronous [`Stream`]of [`RecordBatch`] constructed using [`ParquetRecordBatchStreamBuilder`] to read parquet files.
 ///
 /// `ParquetRecordBatchStream` also provides [`ParquetRecordBatchStream::next_row_group`] for fetching row groups,
 /// allowing users to decode record batches separately from I/O.
+///
+/// # I/O Buffering
+///
+/// `ParquetRecordBatchStream` buffers *all* data pages selected after predicates
+/// (projection + filtering, etc) and decodes the rows from those buffered pages.
+///
+/// For example, if all rows and columns are selected, the entire row group is
+/// buffered in memory during decode. This minimizes the number of IO operations
+/// required, which is especially important for object stores, where IO operations
+/// have latencies in the hundreds of milliseconds
+///
+///
+/// [`Stream`]: https://docs.rs/futures/latest/futures/stream/trait.Stream.html
 pub struct ParquetRecordBatchStream<T> {
     metadata: Arc<ParquetMetaData>,
 

From 74499c0e7846cfbc498bf9fd7a2c1a4c8731c897 Mon Sep 17 00:00:00 2001
From: Kyle Barron <kylebarron2@gmail.com>
Date: Wed, 8 Jan 2025 06:06:13 -0800
Subject: [PATCH 5/9] Return `BoxStream` with `'static` lifetime from
 `ObjectStore::list` (#6619)

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 object_store/src/aws/client.rs        |  2 +-
 object_store/src/aws/mod.rs           |  4 +--
 object_store/src/azure/client.rs      |  2 +-
 object_store/src/azure/mod.rs         |  7 ++--
 object_store/src/chunked.rs           |  4 +--
 object_store/src/client/list.rs       | 19 +++++-----
 object_store/src/client/pagination.rs | 50 ++++++++++++++++-----------
 object_store/src/gcp/client.rs        |  2 +-
 object_store/src/gcp/mod.rs           |  4 +--
 object_store/src/http/mod.rs          | 15 ++++----
 object_store/src/lib.rs               |  8 ++---
 object_store/src/limit.rs             | 14 ++++----
 object_store/src/local.rs             |  2 +-
 object_store/src/memory.rs            |  2 +-
 object_store/src/prefix.rs            | 32 ++++++++++++++---
 object_store/src/throttle.rs          | 16 +++++----
 object_store/tests/get_range_file.rs  |  2 +-
 17 files changed, 113 insertions(+), 72 deletions(-)

diff --git a/object_store/src/aws/client.rs b/object_store/src/aws/client.rs
index b81be0c0efad..246f2779dd07 100644
--- a/object_store/src/aws/client.rs
+++ b/object_store/src/aws/client.rs
@@ -855,7 +855,7 @@ impl GetClient for S3Client {
 }
 
 #[async_trait]
-impl ListClient for S3Client {
+impl ListClient for Arc<S3Client> {
     /// Make an S3 List request <https://docs.aws.amazon.com/AmazonS3/latest/API/API_ListObjectsV2.html>
     async fn list_request(
         &self,
diff --git a/object_store/src/aws/mod.rs b/object_store/src/aws/mod.rs
index 7f449c49963c..82ef909de984 100644
--- a/object_store/src/aws/mod.rs
+++ b/object_store/src/aws/mod.rs
@@ -273,7 +273,7 @@ impl ObjectStore for AmazonS3 {
             .boxed()
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.client.list(prefix)
     }
 
@@ -281,7 +281,7 @@ impl ObjectStore for AmazonS3 {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         if self.client.config.is_s3_express() {
             let offset = offset.clone();
             // S3 Express does not support start-after
diff --git a/object_store/src/azure/client.rs b/object_store/src/azure/client.rs
index bd72d0c6aee1..fa5412c455fc 100644
--- a/object_store/src/azure/client.rs
+++ b/object_store/src/azure/client.rs
@@ -925,7 +925,7 @@ impl GetClient for AzureClient {
 }
 
 #[async_trait]
-impl ListClient for AzureClient {
+impl ListClient for Arc<AzureClient> {
     /// Make an Azure List request <https://docs.microsoft.com/en-us/rest/api/storageservices/list-blobs>
     async fn list_request(
         &self,
diff --git a/object_store/src/azure/mod.rs b/object_store/src/azure/mod.rs
index 81b6667bc058..ea4dd8f567a9 100644
--- a/object_store/src/azure/mod.rs
+++ b/object_store/src/azure/mod.rs
@@ -119,6 +119,9 @@ impl ObjectStore for MicrosoftAzure {
         self.client.delete_request(location, &()).await
     }
 
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
+        self.client.list(prefix)
+    }
     fn delete_stream<'a>(
         &'a self,
         locations: BoxStream<'a, Result<Path>>,
@@ -139,10 +142,6 @@ impl ObjectStore for MicrosoftAzure {
             .boxed()
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
-        self.client.list(prefix)
-    }
-
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
         self.client.list_with_delimiter(prefix).await
     }
diff --git a/object_store/src/chunked.rs b/object_store/src/chunked.rs
index 3f83c1336dc4..4998e9f2a04d 100644
--- a/object_store/src/chunked.rs
+++ b/object_store/src/chunked.rs
@@ -150,7 +150,7 @@ impl ObjectStore for ChunkedStore {
         self.inner.delete(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.inner.list(prefix)
     }
 
@@ -158,7 +158,7 @@ impl ObjectStore for ChunkedStore {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         self.inner.list_with_offset(prefix, offset)
     }
 
diff --git a/object_store/src/client/list.rs b/object_store/src/client/list.rs
index 4445d0d17533..fe9bfebf768d 100644
--- a/object_store/src/client/list.rs
+++ b/object_store/src/client/list.rs
@@ -44,37 +44,38 @@ pub(crate) trait ListClientExt {
         prefix: Option<&Path>,
         delimiter: bool,
         offset: Option<&Path>,
-    ) -> BoxStream<'_, Result<ListResult>>;
+    ) -> BoxStream<'static, Result<ListResult>>;
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>>;
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>>;
 
     #[allow(unused)]
     fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>>;
+    ) -> BoxStream<'static, Result<ObjectMeta>>;
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult>;
 }
 
 #[async_trait]
-impl<T: ListClient> ListClientExt for T {
+impl<T: ListClient + Clone> ListClientExt for T {
     fn list_paginated(
         &self,
         prefix: Option<&Path>,
         delimiter: bool,
         offset: Option<&Path>,
-    ) -> BoxStream<'_, Result<ListResult>> {
+    ) -> BoxStream<'static, Result<ListResult>> {
         let offset = offset.map(|x| x.to_string());
         let prefix = prefix
             .filter(|x| !x.as_ref().is_empty())
             .map(|p| format!("{}{}", p.as_ref(), crate::path::DELIMITER));
 
         stream_paginated(
+            self.clone(),
             (prefix, offset),
-            move |(prefix, offset), token| async move {
-                let (r, next_token) = self
+            move |client, (prefix, offset), token| async move {
+                let (r, next_token) = client
                     .list_request(
                         prefix.as_deref(),
                         delimiter,
@@ -88,7 +89,7 @@ impl<T: ListClient> ListClientExt for T {
         .boxed()
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.list_paginated(prefix, false, None)
             .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok)))
             .try_flatten()
@@ -99,7 +100,7 @@ impl<T: ListClient> ListClientExt for T {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         self.list_paginated(prefix, false, Some(offset))
             .map_ok(|r| futures::stream::iter(r.objects.into_iter().map(Ok)))
             .try_flatten()
diff --git a/object_store/src/client/pagination.rs b/object_store/src/client/pagination.rs
index 77b2a3d8e2f2..d789c7431d8c 100644
--- a/object_store/src/client/pagination.rs
+++ b/object_store/src/client/pagination.rs
@@ -35,9 +35,14 @@ use std::future::Future;
 /// finish, otherwise it will continue to call `op(state, token)` with the values returned by the
 /// previous call to `op`, until a continuation token of `None` is returned
 ///
-pub(crate) fn stream_paginated<F, Fut, S, T>(state: S, op: F) -> impl Stream<Item = Result<T>>
+pub(crate) fn stream_paginated<F, Fut, S, T, C>(
+    client: C,
+    state: S,
+    op: F,
+) -> impl Stream<Item = Result<T>>
 where
-    F: Fn(S, Option<String>) -> Fut + Copy,
+    C: Clone,
+    F: Fn(C, S, Option<String>) -> Fut + Copy,
     Fut: Future<Output = Result<(T, S, Option<String>)>>,
 {
     enum PaginationState<T> {
@@ -46,27 +51,30 @@ where
         Done,
     }
 
-    futures::stream::unfold(PaginationState::Start(state), move |state| async move {
-        let (s, page_token) = match state {
-            PaginationState::Start(s) => (s, None),
-            PaginationState::HasMore(s, page_token) if !page_token.is_empty() => {
-                (s, Some(page_token))
-            }
-            _ => {
-                return None;
-            }
-        };
+    futures::stream::unfold(PaginationState::Start(state), move |state| {
+        let client = client.clone();
+        async move {
+            let (s, page_token) = match state {
+                PaginationState::Start(s) => (s, None),
+                PaginationState::HasMore(s, page_token) if !page_token.is_empty() => {
+                    (s, Some(page_token))
+                }
+                _ => {
+                    return None;
+                }
+            };
 
-        let (resp, s, continuation) = match op(s, page_token).await {
-            Ok(resp) => resp,
-            Err(e) => return Some((Err(e), PaginationState::Done)),
-        };
+            let (resp, s, continuation) = match op(client, s, page_token).await {
+                Ok(resp) => resp,
+                Err(e) => return Some((Err(e), PaginationState::Done)),
+            };
 
-        let next_state = match continuation {
-            Some(token) => PaginationState::HasMore(s, token),
-            None => PaginationState::Done,
-        };
+            let next_state = match continuation {
+                Some(token) => PaginationState::HasMore(s, token),
+                None => PaginationState::Done,
+            };
 
-        Some((Ok(resp), next_state))
+            Some((Ok(resp), next_state))
+        }
     })
 }
diff --git a/object_store/src/gcp/client.rs b/object_store/src/gcp/client.rs
index d6f89ca71740..8dd1c69802a8 100644
--- a/object_store/src/gcp/client.rs
+++ b/object_store/src/gcp/client.rs
@@ -633,7 +633,7 @@ impl GetClient for GoogleCloudStorageClient {
 }
 
 #[async_trait]
-impl ListClient for GoogleCloudStorageClient {
+impl ListClient for Arc<GoogleCloudStorageClient> {
     /// Perform a list request <https://cloud.google.com/storage/docs/xml-api/get-bucket-list>
     async fn list_request(
         &self,
diff --git a/object_store/src/gcp/mod.rs b/object_store/src/gcp/mod.rs
index 5199135ba6b0..a2f512415a8d 100644
--- a/object_store/src/gcp/mod.rs
+++ b/object_store/src/gcp/mod.rs
@@ -183,7 +183,7 @@ impl ObjectStore for GoogleCloudStorage {
         self.client.delete_request(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         self.client.list(prefix)
     }
 
@@ -191,7 +191,7 @@ impl ObjectStore for GoogleCloudStorage {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         self.client.list_with_offset(prefix, offset)
     }
 
diff --git a/object_store/src/http/mod.rs b/object_store/src/http/mod.rs
index 417f72856722..899740d36db9 100644
--- a/object_store/src/http/mod.rs
+++ b/object_store/src/http/mod.rs
@@ -31,6 +31,8 @@
 //! [rfc2518]: https://datatracker.ietf.org/doc/html/rfc2518
 //! [WebDAV]: https://en.wikipedia.org/wiki/WebDAV
 
+use std::sync::Arc;
+
 use async_trait::async_trait;
 use futures::stream::BoxStream;
 use futures::{StreamExt, TryStreamExt};
@@ -79,7 +81,7 @@ impl From<Error> for crate::Error {
 /// See [`crate::http`] for more information
 #[derive(Debug)]
 pub struct HttpStore {
-    client: Client,
+    client: Arc<Client>,
 }
 
 impl std::fmt::Display for HttpStore {
@@ -130,19 +132,20 @@ impl ObjectStore for HttpStore {
         self.client.delete(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix_len = prefix.map(|p| p.as_ref().len()).unwrap_or_default();
         let prefix = prefix.cloned();
+        let client = Arc::clone(&self.client);
         futures::stream::once(async move {
-            let status = self.client.list(prefix.as_ref(), "infinity").await?;
+            let status = client.list(prefix.as_ref(), "infinity").await?;
 
             let iter = status
                 .response
                 .into_iter()
                 .filter(|r| !r.is_dir())
-                .map(|response| {
+                .map(move |response| {
                     response.check_ok()?;
-                    response.object_meta(self.client.base_url())
+                    response.object_meta(client.base_url())
                 })
                 // Filter out exact prefix matches
                 .filter_ok(move |r| r.location.as_ref().len() > prefix_len);
@@ -238,7 +241,7 @@ impl HttpBuilder {
         let parsed = Url::parse(&url).map_err(|source| Error::UnableToParseUrl { url, source })?;
 
         Ok(HttpStore {
-            client: Client::new(parsed, self.client_options, self.retry_config)?,
+            client: Arc::new(Client::new(parsed, self.client_options, self.retry_config)?),
         })
     }
 }
diff --git a/object_store/src/lib.rs b/object_store/src/lib.rs
index 987ffacc6e49..53eda5a82fd5 100644
--- a/object_store/src/lib.rs
+++ b/object_store/src/lib.rs
@@ -722,7 +722,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
     /// `foo/bar_baz/x`. List is recursive, i.e. `foo/bar/more/x` will be included.
     ///
     /// Note: the order of returned [`ObjectMeta`] is not guaranteed
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>>;
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>>;
 
     /// List all the objects with the given prefix and a location greater than `offset`
     ///
@@ -734,7 +734,7 @@ pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let offset = offset.clone();
         self.list(prefix)
             .try_filter(move |f| futures::future::ready(f.location > offset))
@@ -847,7 +847,7 @@ macro_rules! as_ref_impl {
                 self.as_ref().delete_stream(locations)
             }
 
-            fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+            fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
                 self.as_ref().list(prefix)
             }
 
@@ -855,7 +855,7 @@ macro_rules! as_ref_impl {
                 &self,
                 prefix: Option<&Path>,
                 offset: &Path,
-            ) -> BoxStream<'_, Result<ObjectMeta>> {
+            ) -> BoxStream<'static, Result<ObjectMeta>> {
                 self.as_ref().list_with_offset(prefix, offset)
             }
 
diff --git a/object_store/src/limit.rs b/object_store/src/limit.rs
index 6a3c3b574e62..77f72a0e11a1 100644
--- a/object_store/src/limit.rs
+++ b/object_store/src/limit.rs
@@ -45,7 +45,7 @@ use tokio::sync::{OwnedSemaphorePermit, Semaphore};
 ///
 #[derive(Debug)]
 pub struct LimitStore<T: ObjectStore> {
-    inner: T,
+    inner: Arc<T>,
     max_requests: usize,
     semaphore: Arc<Semaphore>,
 }
@@ -56,7 +56,7 @@ impl<T: ObjectStore> LimitStore<T> {
     /// `max_requests`
     pub fn new(inner: T, max_requests: usize) -> Self {
         Self {
-            inner,
+            inner: Arc::new(inner),
             max_requests,
             semaphore: Arc::new(Semaphore::new(max_requests)),
         }
@@ -144,12 +144,13 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
         self.inner.delete_stream(locations)
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix = prefix.cloned();
+        let inner = Arc::clone(&self.inner);
         let fut = Arc::clone(&self.semaphore)
             .acquire_owned()
             .map(move |permit| {
-                let s = self.inner.list(prefix.as_ref());
+                let s = inner.list(prefix.as_ref());
                 PermitWrapper::new(s, permit.unwrap())
             });
         fut.into_stream().flatten().boxed()
@@ -159,13 +160,14 @@ impl<T: ObjectStore> ObjectStore for LimitStore<T> {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix = prefix.cloned();
         let offset = offset.clone();
+        let inner = Arc::clone(&self.inner);
         let fut = Arc::clone(&self.semaphore)
             .acquire_owned()
             .map(move |permit| {
-                let s = self.inner.list_with_offset(prefix.as_ref(), &offset);
+                let s = inner.list_with_offset(prefix.as_ref(), &offset);
                 PermitWrapper::new(s, permit.unwrap())
             });
         fut.into_stream().flatten().boxed()
diff --git a/object_store/src/local.rs b/object_store/src/local.rs
index b193481ae7b8..364026459a03 100644
--- a/object_store/src/local.rs
+++ b/object_store/src/local.rs
@@ -488,7 +488,7 @@ impl ObjectStore for LocalFileSystem {
         .await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let config = Arc::clone(&self.config);
 
         let root_path = match prefix {
diff --git a/object_store/src/memory.rs b/object_store/src/memory.rs
index 3f3cff3390db..6402f924346f 100644
--- a/object_store/src/memory.rs
+++ b/object_store/src/memory.rs
@@ -297,7 +297,7 @@ impl ObjectStore for InMemory {
         Ok(())
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let root = Path::default();
         let prefix = prefix.unwrap_or(&root);
 
diff --git a/object_store/src/prefix.rs b/object_store/src/prefix.rs
index 227887d78fd7..a0b67ca4b58e 100644
--- a/object_store/src/prefix.rs
+++ b/object_store/src/prefix.rs
@@ -74,6 +74,28 @@ impl<T: ObjectStore> PrefixStore<T> {
     }
 }
 
+// Note: This is a relative hack to move these two functions to pure functions so they don't rely
+// on the `self` lifetime. Expected to be cleaned up before merge.
+//
+/// Strip the constant prefix from a given path
+fn strip_prefix(prefix: &Path, path: Path) -> Path {
+    // Note cannot use match because of borrow checker
+    if let Some(suffix) = path.prefix_match(prefix) {
+        return suffix.collect();
+    }
+    path
+}
+
+/// Strip the constant prefix from a given ObjectMeta
+fn strip_meta(prefix: &Path, meta: ObjectMeta) -> ObjectMeta {
+    ObjectMeta {
+        last_modified: meta.last_modified,
+        size: meta.size,
+        location: strip_prefix(prefix, meta.location),
+        e_tag: meta.e_tag,
+        version: None,
+    }
+}
 #[async_trait::async_trait]
 impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
     async fn put(&self, location: &Path, payload: PutPayload) -> Result<PutResult> {
@@ -136,21 +158,23 @@ impl<T: ObjectStore> ObjectStore for PrefixStore<T> {
         self.inner.delete(&full_path).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let prefix = self.full_path(prefix.unwrap_or(&Path::default()));
         let s = self.inner.list(Some(&prefix));
-        s.map_ok(|meta| self.strip_meta(meta)).boxed()
+        let slf_prefix = self.prefix.clone();
+        s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed()
     }
 
     fn list_with_offset(
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let offset = self.full_path(offset);
         let prefix = self.full_path(prefix.unwrap_or(&Path::default()));
         let s = self.inner.list_with_offset(Some(&prefix), &offset);
-        s.map_ok(|meta| self.strip_meta(meta)).boxed()
+        let slf_prefix = self.prefix.clone();
+        s.map_ok(move |meta| strip_meta(&slf_prefix, meta)).boxed()
     }
 
     async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
diff --git a/object_store/src/throttle.rs b/object_store/src/throttle.rs
index b9dff5c6d1d2..29cd32705ccc 100644
--- a/object_store/src/throttle.rs
+++ b/object_store/src/throttle.rs
@@ -237,11 +237,13 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
         self.inner.delete(location).await
     }
 
-    fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, prefix: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         let stream = self.inner.list(prefix);
+        let config = Arc::clone(&self.config);
         futures::stream::once(async move {
-            let wait_list_per_entry = self.config().wait_list_per_entry;
-            sleep(self.config().wait_list_per_call).await;
+            let config = *config.lock();
+            let wait_list_per_entry = config.wait_list_per_entry;
+            sleep(config.wait_list_per_call).await;
             throttle_stream(stream, move |_| wait_list_per_entry)
         })
         .flatten()
@@ -252,11 +254,13 @@ impl<T: ObjectStore> ObjectStore for ThrottledStore<T> {
         &self,
         prefix: Option<&Path>,
         offset: &Path,
-    ) -> BoxStream<'_, Result<ObjectMeta>> {
+    ) -> BoxStream<'static, Result<ObjectMeta>> {
         let stream = self.inner.list_with_offset(prefix, offset);
+        let config = Arc::clone(&self.config);
         futures::stream::once(async move {
-            let wait_list_per_entry = self.config().wait_list_per_entry;
-            sleep(self.config().wait_list_per_call).await;
+            let config = *config.lock();
+            let wait_list_per_entry = config.wait_list_per_entry;
+            sleep(config.wait_list_per_call).await;
             throttle_stream(stream, move |_| wait_list_per_entry)
         })
         .flatten()
diff --git a/object_store/tests/get_range_file.rs b/object_store/tests/get_range_file.rs
index c5550ac21728..e500fc8ac87d 100644
--- a/object_store/tests/get_range_file.rs
+++ b/object_store/tests/get_range_file.rs
@@ -62,7 +62,7 @@ impl ObjectStore for MyStore {
         todo!()
     }
 
-    fn list(&self, _: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
+    fn list(&self, _: Option<&Path>) -> BoxStream<'static, Result<ObjectMeta>> {
         todo!()
     }
 

From a47d9967be152b246e9fde1fe12ee512bcd5a856 Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Wed, 8 Jan 2025 09:28:05 -0500
Subject: [PATCH 6/9] [Parquet] Reuse buffer in `ByteViewArrayDecoderPlain` 
 (#6930)

* reuse buffer in view array

* Update parquet/src/arrow/array_reader/byte_view_array.rs

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>

* use From<Bytes> instead

---------

Co-authored-by: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com>
---
 .../src/arrow/array_reader/byte_view_array.rs | 38 ++++++++++++++++---
 1 file changed, 32 insertions(+), 6 deletions(-)

diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs
index 92a8b0592d0d..0e16642940d2 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -290,7 +290,7 @@ impl ByteViewArrayDecoder {
 
 /// Decoder from [`Encoding::PLAIN`] data to [`ViewBuffer`]
 pub struct ByteViewArrayDecoderPlain {
-    buf: Bytes,
+    buf: Buffer,
     offset: usize,
 
     validate_utf8: bool,
@@ -308,7 +308,7 @@ impl ByteViewArrayDecoderPlain {
         validate_utf8: bool,
     ) -> Self {
         Self {
-            buf,
+            buf: Buffer::from(buf),
             offset: 0,
             max_remaining_values: num_values.unwrap_or(num_levels),
             validate_utf8,
@@ -316,9 +316,15 @@ impl ByteViewArrayDecoderPlain {
     }
 
     pub fn read(&mut self, output: &mut ViewBuffer, len: usize) -> Result<usize> {
-        // Zero copy convert `bytes::Bytes` into `arrow_buffer::Buffer`
-        let buf = arrow_buffer::Buffer::from(self.buf.clone());
-        let block_id = output.append_block(buf);
+        // avoid creating a new buffer if the last buffer is the same as the current buffer
+        // This is especially useful when row-level filtering is applied, where we call lots of small `read` over the same buffer.
+        let block_id = {
+            if output.buffers.last().is_some_and(|x| x.ptr_eq(&self.buf)) {
+                output.buffers.len() as u32 - 1
+            } else {
+                output.append_block(self.buf.clone())
+            }
+        };
 
         let to_read = len.min(self.max_remaining_values);
 
@@ -690,12 +696,13 @@ mod tests {
 
     use crate::{
         arrow::{
-            array_reader::test_util::{byte_array_all_encodings, utf8_column},
+            array_reader::test_util::{byte_array_all_encodings, encode_byte_array, utf8_column},
             buffer::view_buffer::ViewBuffer,
             record_reader::buffer::ValuesBuffer,
         },
         basic::Encoding,
         column::reader::decoder::ColumnValueDecoder,
+        data_type::ByteArray,
     };
 
     use super::*;
@@ -746,4 +753,23 @@ mod tests {
             );
         }
     }
+
+    #[test]
+    fn test_byte_view_array_plain_decoder_reuse_buffer() {
+        let byte_array = vec!["hello", "world", "large payload over 12 bytes", "b"];
+        let byte_array: Vec<ByteArray> = byte_array.into_iter().map(|x| x.into()).collect();
+        let pages = encode_byte_array(Encoding::PLAIN, &byte_array);
+
+        let column_desc = utf8_column();
+        let mut decoder = ByteViewArrayColumnValueDecoder::new(&column_desc);
+
+        let mut view_buffer = ViewBuffer::default();
+        decoder.set_data(Encoding::PLAIN, pages, 4, None).unwrap();
+        decoder.read(&mut view_buffer, 1).unwrap();
+        decoder.read(&mut view_buffer, 1).unwrap();
+        assert_eq!(view_buffer.buffers.len(), 1);
+
+        decoder.read(&mut view_buffer, 1).unwrap();
+        assert_eq!(view_buffer.buffers.len(), 1);
+    }
 }

From 485dbb1e0f692c7bfa47376d477bb62e7d801a8c Mon Sep 17 00:00:00 2001
From: Benjamin Kietzman <bengilgit@gmail.com>
Date: Wed, 8 Jan 2025 09:54:39 -0800
Subject: [PATCH 7/9] regenerate arrow-ipc/src/gen with patched flatbuffers
 (#6426)

* regenerate arrow-ipc/src/gen with patched flatbuffers

* use git repo instead of local path

* add backticks

* expand allowed overage to accommodate more alignment padding

* re-enable nanoarrow integration test

* add assertions that struct alignment is correct

* remove struct alignment assertions

* apply a patch to generated code rather than requiring patched flatc

* point to google/flatbuffers with pub PushAlignment

* add license header to gen.patch

* use flatbuffers 24.12.23

* remove unnecessary gen.patch
---
 .github/workflows/integration.yml |   3 +-
 arrow-flight/src/encode.rs        |  14 +-
 arrow-ipc/Cargo.toml              |   2 +-
 arrow-ipc/regen.sh                |  90 +++----
 arrow-ipc/src/gen/File.rs         |  26 +-
 arrow-ipc/src/gen/Message.rs      |  66 ++---
 arrow-ipc/src/gen/Schema.rs       | 397 +++++++++++++++---------------
 arrow-ipc/src/gen/SparseTensor.rs | 182 +++++++++++---
 arrow-ipc/src/gen/Tensor.rs       | 150 +++++++++--
 arrow-ipc/src/lib.rs              |  11 +
 10 files changed, 609 insertions(+), 332 deletions(-)

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 9b23b1b5ad2e..a47195d1becf 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -65,8 +65,7 @@ jobs:
       ARROW_INTEGRATION_JAVA: ON
       ARROW_INTEGRATION_JS: ON
       ARCHERY_INTEGRATION_TARGET_IMPLEMENTATIONS: "rust"
-      # Disable nanoarrow integration, due to https://github.com/apache/arrow-rs/issues/5052
-      ARCHERY_INTEGRATION_WITH_NANOARROW: "0"
+      ARCHERY_INTEGRATION_WITH_NANOARROW: "1"
       # https://github.com/apache/arrow/pull/38403/files#r1371281630
       ARCHERY_INTEGRATION_WITH_RUST: "1"
       # These are necessary because the github runner overrides $HOME
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 19fe42474405..57ac9f3173fe 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -1708,7 +1708,7 @@ mod tests {
         ])
         .unwrap();
 
-        verify_encoded_split(batch, 112).await;
+        verify_encoded_split(batch, 120).await;
     }
 
     #[tokio::test]
@@ -1719,7 +1719,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 4304).await;
+        verify_encoded_split(batch, 4312).await;
     }
 
     #[tokio::test]
@@ -1755,7 +1755,7 @@ mod tests {
         // 5k over limit (which is 2x larger than limit of 5k)
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 5800).await;
+        verify_encoded_split(batch, 5808).await;
     }
 
     #[tokio::test]
@@ -1771,7 +1771,7 @@ mod tests {
 
         let batch = RecordBatch::try_from_iter(vec![("a1", Arc::new(array) as _)]).unwrap();
 
-        verify_encoded_split(batch, 48).await;
+        verify_encoded_split(batch, 56).await;
     }
 
     #[tokio::test]
@@ -1785,7 +1785,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 3328).await;
+        verify_encoded_split(batch, 3336).await;
     }
 
     #[tokio::test]
@@ -1799,7 +1799,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 5280).await;
+        verify_encoded_split(batch, 5288).await;
     }
 
     #[tokio::test]
@@ -1824,7 +1824,7 @@ mod tests {
 
         // overage is much higher than ideal
         // https://github.com/apache/arrow-rs/issues/3478
-        verify_encoded_split(batch, 4128).await;
+        verify_encoded_split(batch, 4136).await;
     }
 
     /// Return size, in memory of flight data
diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml
index cf91b3a3415f..4988eed4a5ed 100644
--- a/arrow-ipc/Cargo.toml
+++ b/arrow-ipc/Cargo.toml
@@ -38,7 +38,7 @@ arrow-array = { workspace = true }
 arrow-buffer = { workspace = true }
 arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
-flatbuffers = { version = "24.3.25", default-features = false }
+flatbuffers = { version = "24.12.23", default-features = false }
 lz4_flex = { version = "0.11", default-features = false, features = ["std", "frame"], optional = true }
 zstd = { version = "0.13.0", default-features = false, optional = true }
 
diff --git a/arrow-ipc/regen.sh b/arrow-ipc/regen.sh
index 8d8862ccc7f4..b368bd1bc7cc 100755
--- a/arrow-ipc/regen.sh
+++ b/arrow-ipc/regen.sh
@@ -21,33 +21,36 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 # Change to the toplevel `arrow-rs` directory
 pushd $DIR/../
 
-echo "Build flatc from source ..."
-
-FB_URL="https://github.com/google/flatbuffers"
-FB_DIR="arrow/.flatbuffers"
-FLATC="$FB_DIR/bazel-bin/flatc"
-
-if [ -z $(which bazel) ]; then
-    echo "bazel is required to build flatc"
-    exit 1
-fi
-
-echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
-
-if [ ! -e $FB_DIR ]; then
-    echo "git clone $FB_URL ..."
-    git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
+if [ -z "$FLATC" ]; then
+  echo "Build flatc from source ..."
+
+  FB_URL="https://github.com/google/flatbuffers"
+  FB_DIR="arrow/.flatbuffers"
+  FLATC="$FB_DIR/bazel-bin/flatc"
+
+  if [ -z $(which bazel) ]; then
+      echo "bazel is required to build flatc"
+      exit 1
+  fi
+
+  echo "Bazel version: $(bazel version | head -1 | awk -F':' '{print $2}')"
+
+  if [ ! -e $FB_DIR ]; then
+      echo "git clone $FB_URL ..."
+      git clone -b master --no-tag --depth 1 $FB_URL $FB_DIR
+  else
+      echo "git pull $FB_URL ..."
+      git -C $FB_DIR pull
+  fi
+
+  pushd $FB_DIR
+  echo "run: bazel build :flatc ..."
+  bazel build :flatc
+  popd
 else
-    echo "git pull $FB_URL ..."
-    git -C $FB_DIR pull
+  echo "Using flatc $FLATC ..."
 fi
 
-pushd $FB_DIR
-echo "run: bazel build :flatc ..."
-bazel build :flatc
-popd
-
-
 # Execute the code generation:
 $FLATC --filename-suffix "" --rust -o arrow-ipc/src/gen/ format/*.fbs
 
@@ -99,37 +102,38 @@ for f in `ls *.rs`; do
     fi
 
     echo "Modifying: $f"
-    sed -i '' '/extern crate flatbuffers;/d' $f
-    sed -i '' '/use self::flatbuffers::EndianScalar;/d' $f
-    sed -i '' '/\#\[allow(unused_imports, dead_code)\]/d' $f
-    sed -i '' '/pub mod org {/d' $f
-    sed -i '' '/pub mod apache {/d' $f
-    sed -i '' '/pub mod arrow {/d' $f
-    sed -i '' '/pub mod flatbuf {/d' $f
-    sed -i '' '/}  \/\/ pub mod flatbuf/d' $f
-    sed -i '' '/}  \/\/ pub mod arrow/d' $f
-    sed -i '' '/}  \/\/ pub mod apache/d' $f
-    sed -i '' '/}  \/\/ pub mod org/d' $f
-    sed -i '' '/use core::mem;/d' $f
-    sed -i '' '/use core::cmp::Ordering;/d' $f
-    sed -i '' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f
+    sed --in-place='' '/extern crate flatbuffers;/d' $f
+    sed --in-place='' '/use self::flatbuffers::EndianScalar;/d' $f
+    sed --in-place='' '/\#\[allow(unused_imports, dead_code)\]/d' $f
+    sed --in-place='' '/pub mod org {/d' $f
+    sed --in-place='' '/pub mod apache {/d' $f
+    sed --in-place='' '/pub mod arrow {/d' $f
+    sed --in-place='' '/pub mod flatbuf {/d' $f
+    sed --in-place='' '/}  \/\/ pub mod flatbuf/d' $f
+    sed --in-place='' '/}  \/\/ pub mod arrow/d' $f
+    sed --in-place='' '/}  \/\/ pub mod apache/d' $f
+    sed --in-place='' '/}  \/\/ pub mod org/d' $f
+    sed --in-place='' '/use core::mem;/d' $f
+    sed --in-place='' '/use core::cmp::Ordering;/d' $f
+    sed --in-place='' '/use self::flatbuffers::{EndianScalar, Follow};/d' $f
 
     # required by flatc 1.12.0+
-    sed -i '' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
+    sed --in-place='' "/\#\!\[allow(unused_imports, dead_code)\]/d" $f
     for name in ${names[@]}; do
-        sed -i '' "/use crate::${name}::\*;/d" $f
-        sed -i '' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
+        sed --in-place='' "/use crate::${name}::\*;/d" $f
+        sed --in-place='' "s/use self::flatbuffers::Verifiable;/use flatbuffers::Verifiable;/g" $f
     done
 
     # Replace all occurrences of "type__" with "type_", "TYPE__" with "TYPE_".
-    sed -i '' 's/type__/type_/g' $f
-    sed -i '' 's/TYPE__/TYPE_/g' $f
+    sed --in-place='' 's/type__/type_/g' $f
+    sed --in-place='' 's/TYPE__/TYPE_/g' $f
 
     # Some files need prefixes
     if [[ $f == "File.rs" ]]; then 
         # Now prefix the file with the static contents
         echo -e "${PREFIX}" "${SCHEMA_IMPORT}" | cat - $f > temp && mv temp $f
     elif [[ $f == "Message.rs" ]]; then
+        sed --in-place='' 's/List<Int16>/\`List<Int16>\`/g' $f
         echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${SPARSE_TENSOR_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
     elif [[ $f == "SparseTensor.rs" ]]; then
         echo -e "${PREFIX}" "${SCHEMA_IMPORT}" "${TENSOR_IMPORT}" | cat - $f > temp && mv temp $f
diff --git a/arrow-ipc/src/gen/File.rs b/arrow-ipc/src/gen/File.rs
index c0c2fb183237..427cf75de096 100644
--- a/arrow-ipc/src/gen/File.rs
+++ b/arrow-ipc/src/gen/File.rs
@@ -23,6 +23,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 // struct Block, aligned to 8
 #[repr(transparent)]
 #[derive(Clone, Copy, PartialEq)]
@@ -64,6 +66,10 @@ impl<'b> flatbuffers::Push for Block {
         let src = ::core::slice::from_raw_parts(self as *const Block as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
+    #[inline]
+    fn alignment() -> flatbuffers::PushAlignment {
+        flatbuffers::PushAlignment::new(8)
+    }
 }
 
 impl<'a> flatbuffers::Verifiable for Block {
@@ -211,8 +217,8 @@ impl<'a> Footer<'a> {
         Footer { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FooterArgs<'args>,
     ) -> flatbuffers::WIPOffset<Footer<'bldr>> {
         let mut builder = FooterBuilder::new(_fbb);
@@ -344,11 +350,11 @@ impl<'a> Default for FooterArgs<'a> {
     }
 }
 
-pub struct FooterBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FooterBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FooterBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_version(&mut self, version: MetadataVersion) {
         self.fbb_
@@ -388,7 +394,7 @@ impl<'a: 'b, 'b> FooterBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FooterBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FooterBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FooterBuilder {
             fbb_: _fbb,
@@ -474,16 +480,16 @@ pub unsafe fn size_prefixed_root_as_footer_unchecked(buf: &[u8]) -> Footer {
     flatbuffers::size_prefixed_root_unchecked::<Footer>(buf)
 }
 #[inline]
-pub fn finish_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Footer<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_footer_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_footer_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Footer<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/Message.rs b/arrow-ipc/src/gen/Message.rs
index 1f49f1d9428b..928b41cc0699 100644
--- a/arrow-ipc/src/gen/Message.rs
+++ b/arrow-ipc/src/gen/Message.rs
@@ -386,6 +386,10 @@ impl<'b> flatbuffers::Push for FieldNode {
             ::core::slice::from_raw_parts(self as *const FieldNode as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
+    #[inline]
+    fn alignment() -> flatbuffers::PushAlignment {
+        flatbuffers::PushAlignment::new(8)
+    }
 }
 
 impl<'a> flatbuffers::Verifiable for FieldNode {
@@ -501,8 +505,8 @@ impl<'a> BodyCompression<'a> {
         BodyCompression { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args BodyCompressionArgs,
     ) -> flatbuffers::WIPOffset<BodyCompression<'bldr>> {
         let mut builder = BodyCompressionBuilder::new(_fbb);
@@ -569,11 +573,11 @@ impl<'a> Default for BodyCompressionArgs {
     }
 }
 
-pub struct BodyCompressionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BodyCompressionBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BodyCompressionBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BodyCompressionBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_codec(&mut self, codec: CompressionType) {
         self.fbb_.push_slot::<CompressionType>(
@@ -591,7 +595,9 @@ impl<'a: 'b, 'b> BodyCompressionBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BodyCompressionBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> BodyCompressionBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BodyCompressionBuilder {
             fbb_: _fbb,
@@ -645,8 +651,8 @@ impl<'a> RecordBatch<'a> {
         RecordBatch { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args RecordBatchArgs<'args>,
     ) -> flatbuffers::WIPOffset<RecordBatch<'bldr>> {
         let mut builder = RecordBatchBuilder::new(_fbb);
@@ -808,11 +814,11 @@ impl<'a> Default for RecordBatchArgs<'a> {
     }
 }
 
-pub struct RecordBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct RecordBatchBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> RecordBatchBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_length(&mut self, length: i64) {
         self.fbb_
@@ -850,7 +856,9 @@ impl<'a: 'b, 'b> RecordBatchBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RecordBatchBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> RecordBatchBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         RecordBatchBuilder {
             fbb_: _fbb,
@@ -908,8 +916,8 @@ impl<'a> DictionaryBatch<'a> {
         DictionaryBatch { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DictionaryBatchArgs<'args>,
     ) -> flatbuffers::WIPOffset<DictionaryBatch<'bldr>> {
         let mut builder = DictionaryBatchBuilder::new(_fbb);
@@ -989,11 +997,11 @@ impl<'a> Default for DictionaryBatchArgs<'a> {
     }
 }
 
-pub struct DictionaryBatchBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DictionaryBatchBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DictionaryBatchBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DictionaryBatchBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_id(&mut self, id: i64) {
         self.fbb_.push_slot::<i64>(DictionaryBatch::VT_ID, id, 0);
@@ -1012,7 +1020,9 @@ impl<'a: 'b, 'b> DictionaryBatchBuilder<'a, 'b> {
             .push_slot::<bool>(DictionaryBatch::VT_ISDELTA, isDelta, false);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DictionaryBatchBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> DictionaryBatchBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DictionaryBatchBuilder {
             fbb_: _fbb,
@@ -1064,8 +1074,8 @@ impl<'a> Message<'a> {
         Message { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args MessageArgs<'args>,
     ) -> flatbuffers::WIPOffset<Message<'bldr>> {
         let mut builder = MessageBuilder::new(_fbb);
@@ -1290,11 +1300,11 @@ impl<'a> Default for MessageArgs<'a> {
     }
 }
 
-pub struct MessageBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct MessageBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MessageBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_version(&mut self, version: MetadataVersion) {
         self.fbb_
@@ -1331,7 +1341,7 @@ impl<'a: 'b, 'b> MessageBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MessageBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> MessageBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         MessageBuilder {
             fbb_: _fbb,
@@ -1474,16 +1484,16 @@ pub unsafe fn size_prefixed_root_as_message_unchecked(buf: &[u8]) -> Message {
     flatbuffers::size_prefixed_root_unchecked::<Message>(buf)
 }
 #[inline]
-pub fn finish_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_message_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Message<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_message_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_message_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Message<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/Schema.rs b/arrow-ipc/src/gen/Schema.rs
index ed9dbaa249f0..223e5a2f6c6d 100644
--- a/arrow-ipc/src/gen/Schema.rs
+++ b/arrow-ipc/src/gen/Schema.rs
@@ -1057,15 +1057,6 @@ impl Endianness {
             _ => None,
         }
     }
-
-    /// Returns true if the endianness of the source system matches the endianness of the target system.
-    pub fn equals_to_target_endianness(self) -> bool {
-        match self {
-            Self::Little => cfg!(target_endian = "little"),
-            Self::Big => cfg!(target_endian = "big"),
-            _ => false,
-        }
-    }
 }
 impl core::fmt::Debug for Endianness {
     fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
@@ -1161,6 +1152,10 @@ impl<'b> flatbuffers::Push for Buffer {
         let src = ::core::slice::from_raw_parts(self as *const Buffer as *const u8, Self::size());
         dst.copy_from_slice(src);
     }
+    #[inline]
+    fn alignment() -> flatbuffers::PushAlignment {
+        flatbuffers::PushAlignment::new(8)
+    }
 }
 
 impl<'a> flatbuffers::Verifiable for Buffer {
@@ -1273,8 +1268,8 @@ impl<'a> Null<'a> {
         Null { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args NullArgs,
     ) -> flatbuffers::WIPOffset<Null<'bldr>> {
         let mut builder = NullBuilder::new(_fbb);
@@ -1301,13 +1296,13 @@ impl<'a> Default for NullArgs {
     }
 }
 
-pub struct NullBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct NullBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> NullBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> NullBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> NullBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> NullBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         NullBuilder {
             fbb_: _fbb,
@@ -1353,8 +1348,8 @@ impl<'a> Struct_<'a> {
         Struct_ { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args Struct_Args,
     ) -> flatbuffers::WIPOffset<Struct_<'bldr>> {
         let mut builder = Struct_Builder::new(_fbb);
@@ -1381,13 +1376,13 @@ impl<'a> Default for Struct_Args {
     }
 }
 
-pub struct Struct_Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct Struct_Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> Struct_Builder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Struct_Builder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Struct_Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Struct_Builder<'a, 'b, A> {
         let start = _fbb.start_table();
         Struct_Builder {
             fbb_: _fbb,
@@ -1430,8 +1425,8 @@ impl<'a> List<'a> {
         List { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args ListArgs,
     ) -> flatbuffers::WIPOffset<List<'bldr>> {
         let mut builder = ListBuilder::new(_fbb);
@@ -1458,13 +1453,13 @@ impl<'a> Default for ListArgs {
     }
 }
 
-pub struct ListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct ListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> ListBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ListBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ListBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         ListBuilder {
             fbb_: _fbb,
@@ -1509,8 +1504,8 @@ impl<'a> LargeList<'a> {
         LargeList { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeListArgs,
     ) -> flatbuffers::WIPOffset<LargeList<'bldr>> {
         let mut builder = LargeListBuilder::new(_fbb);
@@ -1537,13 +1532,13 @@ impl<'a> Default for LargeListArgs {
     }
 }
 
-pub struct LargeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeListBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeListBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeListBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> LargeListBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeListBuilder {
             fbb_: _fbb,
@@ -1589,8 +1584,8 @@ impl<'a> ListView<'a> {
         ListView { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args ListViewArgs,
     ) -> flatbuffers::WIPOffset<ListView<'bldr>> {
         let mut builder = ListViewBuilder::new(_fbb);
@@ -1617,13 +1612,13 @@ impl<'a> Default for ListViewArgs {
     }
 }
 
-pub struct ListViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct ListViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> ListViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ListViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> ListViewBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> ListViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         ListViewBuilder {
             fbb_: _fbb,
@@ -1668,8 +1663,8 @@ impl<'a> LargeListView<'a> {
         LargeListView { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeListViewArgs,
     ) -> flatbuffers::WIPOffset<LargeListView<'bldr>> {
         let mut builder = LargeListViewBuilder::new(_fbb);
@@ -1696,13 +1691,15 @@ impl<'a> Default for LargeListViewArgs {
     }
 }
 
-pub struct LargeListViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeListViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeListViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeListViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeListViewBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> LargeListViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeListViewBuilder {
             fbb_: _fbb,
@@ -1747,8 +1744,8 @@ impl<'a> FixedSizeList<'a> {
         FixedSizeList { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FixedSizeListArgs,
     ) -> flatbuffers::WIPOffset<FixedSizeList<'bldr>> {
         let mut builder = FixedSizeListBuilder::new(_fbb);
@@ -1793,18 +1790,20 @@ impl<'a> Default for FixedSizeListArgs {
     }
 }
 
-pub struct FixedSizeListBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FixedSizeListBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FixedSizeListBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FixedSizeListBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_listSize(&mut self, listSize: i32) {
         self.fbb_
             .push_slot::<i32>(FixedSizeList::VT_LISTSIZE, listSize, 0);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FixedSizeListBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> FixedSizeListBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FixedSizeListBuilder {
             fbb_: _fbb,
@@ -1875,8 +1874,8 @@ impl<'a> Map<'a> {
         Map { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args MapArgs,
     ) -> flatbuffers::WIPOffset<Map<'bldr>> {
         let mut builder = MapBuilder::new(_fbb);
@@ -1921,18 +1920,18 @@ impl<'a> Default for MapArgs {
     }
 }
 
-pub struct MapBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct MapBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> MapBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> MapBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_keysSorted(&mut self, keysSorted: bool) {
         self.fbb_
             .push_slot::<bool>(Map::VT_KEYSSORTED, keysSorted, false);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> MapBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> MapBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         MapBuilder {
             fbb_: _fbb,
@@ -1983,8 +1982,8 @@ impl<'a> Union<'a> {
         Union { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args UnionArgs<'args>,
     ) -> flatbuffers::WIPOffset<Union<'bldr>> {
         let mut builder = UnionBuilder::new(_fbb);
@@ -2053,11 +2052,11 @@ impl<'a> Default for UnionArgs<'a> {
     }
 }
 
-pub struct UnionBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct UnionBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> UnionBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> UnionBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_mode(&mut self, mode: UnionMode) {
         self.fbb_
@@ -2069,7 +2068,7 @@ impl<'a: 'b, 'b> UnionBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Union::VT_TYPEIDS, typeIds);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> UnionBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> UnionBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         UnionBuilder {
             fbb_: _fbb,
@@ -2117,8 +2116,8 @@ impl<'a> Int<'a> {
         Int { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args IntArgs,
     ) -> flatbuffers::WIPOffset<Int<'bldr>> {
         let mut builder = IntBuilder::new(_fbb);
@@ -2175,11 +2174,11 @@ impl<'a> Default for IntArgs {
     }
 }
 
-pub struct IntBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct IntBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> IntBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> IntBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_bitWidth(&mut self, bitWidth: i32) {
         self.fbb_.push_slot::<i32>(Int::VT_BITWIDTH, bitWidth, 0);
@@ -2190,7 +2189,7 @@ impl<'a: 'b, 'b> IntBuilder<'a, 'b> {
             .push_slot::<bool>(Int::VT_IS_SIGNED, is_signed, false);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IntBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> IntBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         IntBuilder {
             fbb_: _fbb,
@@ -2237,8 +2236,8 @@ impl<'a> FloatingPoint<'a> {
         FloatingPoint { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FloatingPointArgs,
     ) -> flatbuffers::WIPOffset<FloatingPoint<'bldr>> {
         let mut builder = FloatingPointBuilder::new(_fbb);
@@ -2284,18 +2283,20 @@ impl<'a> Default for FloatingPointArgs {
     }
 }
 
-pub struct FloatingPointBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FloatingPointBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FloatingPointBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FloatingPointBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_precision(&mut self, precision: Precision) {
         self.fbb_
             .push_slot::<Precision>(FloatingPoint::VT_PRECISION, precision, Precision::HALF);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FloatingPointBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> FloatingPointBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FloatingPointBuilder {
             fbb_: _fbb,
@@ -2340,8 +2341,8 @@ impl<'a> Utf8<'a> {
         Utf8 { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args Utf8Args,
     ) -> flatbuffers::WIPOffset<Utf8<'bldr>> {
         let mut builder = Utf8Builder::new(_fbb);
@@ -2368,13 +2369,13 @@ impl<'a> Default for Utf8Args {
     }
 }
 
-pub struct Utf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct Utf8Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> Utf8Builder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Utf8Builder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Utf8Builder<'a, 'b, A> {
         let start = _fbb.start_table();
         Utf8Builder {
             fbb_: _fbb,
@@ -2418,8 +2419,8 @@ impl<'a> Binary<'a> {
         Binary { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args BinaryArgs,
     ) -> flatbuffers::WIPOffset<Binary<'bldr>> {
         let mut builder = BinaryBuilder::new(_fbb);
@@ -2446,13 +2447,13 @@ impl<'a> Default for BinaryArgs {
     }
 }
 
-pub struct BinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BinaryBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BinaryBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BinaryBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BinaryBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> BinaryBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BinaryBuilder {
             fbb_: _fbb,
@@ -2497,8 +2498,8 @@ impl<'a> LargeUtf8<'a> {
         LargeUtf8 { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeUtf8Args,
     ) -> flatbuffers::WIPOffset<LargeUtf8<'bldr>> {
         let mut builder = LargeUtf8Builder::new(_fbb);
@@ -2525,13 +2526,13 @@ impl<'a> Default for LargeUtf8Args {
     }
 }
 
-pub struct LargeUtf8Builder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeUtf8Builder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeUtf8Builder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeUtf8Builder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeUtf8Builder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> LargeUtf8Builder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeUtf8Builder {
             fbb_: _fbb,
@@ -2576,8 +2577,8 @@ impl<'a> LargeBinary<'a> {
         LargeBinary { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args LargeBinaryArgs,
     ) -> flatbuffers::WIPOffset<LargeBinary<'bldr>> {
         let mut builder = LargeBinaryBuilder::new(_fbb);
@@ -2604,13 +2605,15 @@ impl<'a> Default for LargeBinaryArgs {
     }
 }
 
-pub struct LargeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct LargeBinaryBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> LargeBinaryBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> LargeBinaryBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> LargeBinaryBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> LargeBinaryBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         LargeBinaryBuilder {
             fbb_: _fbb,
@@ -2660,8 +2663,8 @@ impl<'a> Utf8View<'a> {
         Utf8View { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args Utf8ViewArgs,
     ) -> flatbuffers::WIPOffset<Utf8View<'bldr>> {
         let mut builder = Utf8ViewBuilder::new(_fbb);
@@ -2688,13 +2691,13 @@ impl<'a> Default for Utf8ViewArgs {
     }
 }
 
-pub struct Utf8ViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct Utf8ViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> Utf8ViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> Utf8ViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> Utf8ViewBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> Utf8ViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         Utf8ViewBuilder {
             fbb_: _fbb,
@@ -2744,8 +2747,8 @@ impl<'a> BinaryView<'a> {
         BinaryView { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args BinaryViewArgs,
     ) -> flatbuffers::WIPOffset<BinaryView<'bldr>> {
         let mut builder = BinaryViewBuilder::new(_fbb);
@@ -2772,13 +2775,15 @@ impl<'a> Default for BinaryViewArgs {
     }
 }
 
-pub struct BinaryViewBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BinaryViewBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BinaryViewBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BinaryViewBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BinaryViewBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> BinaryViewBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BinaryViewBuilder {
             fbb_: _fbb,
@@ -2823,8 +2828,8 @@ impl<'a> FixedSizeBinary<'a> {
         FixedSizeBinary { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FixedSizeBinaryArgs,
     ) -> flatbuffers::WIPOffset<FixedSizeBinary<'bldr>> {
         let mut builder = FixedSizeBinaryBuilder::new(_fbb);
@@ -2869,18 +2874,20 @@ impl<'a> Default for FixedSizeBinaryArgs {
     }
 }
 
-pub struct FixedSizeBinaryBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FixedSizeBinaryBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FixedSizeBinaryBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FixedSizeBinaryBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_byteWidth(&mut self, byteWidth: i32) {
         self.fbb_
             .push_slot::<i32>(FixedSizeBinary::VT_BYTEWIDTH, byteWidth, 0);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FixedSizeBinaryBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> FixedSizeBinaryBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FixedSizeBinaryBuilder {
             fbb_: _fbb,
@@ -2924,8 +2931,8 @@ impl<'a> Bool<'a> {
         Bool { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args BoolArgs,
     ) -> flatbuffers::WIPOffset<Bool<'bldr>> {
         let mut builder = BoolBuilder::new(_fbb);
@@ -2952,13 +2959,13 @@ impl<'a> Default for BoolArgs {
     }
 }
 
-pub struct BoolBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct BoolBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> BoolBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> BoolBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> BoolBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> BoolBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         BoolBuilder {
             fbb_: _fbb,
@@ -3006,8 +3013,8 @@ impl<'a> RunEndEncoded<'a> {
         RunEndEncoded { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         _args: &'args RunEndEncodedArgs,
     ) -> flatbuffers::WIPOffset<RunEndEncoded<'bldr>> {
         let mut builder = RunEndEncodedBuilder::new(_fbb);
@@ -3034,13 +3041,15 @@ impl<'a> Default for RunEndEncodedArgs {
     }
 }
 
-pub struct RunEndEncodedBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct RunEndEncodedBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> RunEndEncodedBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> RunEndEncodedBuilder<'a, 'b, A> {
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> RunEndEncodedBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> RunEndEncodedBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         RunEndEncodedBuilder {
             fbb_: _fbb,
@@ -3091,8 +3100,8 @@ impl<'a> Decimal<'a> {
         Decimal { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DecimalArgs,
     ) -> flatbuffers::WIPOffset<Decimal<'bldr>> {
         let mut builder = DecimalBuilder::new(_fbb);
@@ -3168,11 +3177,11 @@ impl<'a> Default for DecimalArgs {
     }
 }
 
-pub struct DecimalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DecimalBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DecimalBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_precision(&mut self, precision: i32) {
         self.fbb_
@@ -3188,7 +3197,7 @@ impl<'a: 'b, 'b> DecimalBuilder<'a, 'b> {
             .push_slot::<i32>(Decimal::VT_BITWIDTH, bitWidth, 128);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DecimalBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DecimalBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DecimalBuilder {
             fbb_: _fbb,
@@ -3242,8 +3251,8 @@ impl<'a> Date<'a> {
         Date { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DateArgs,
     ) -> flatbuffers::WIPOffset<Date<'bldr>> {
         let mut builder = DateBuilder::new(_fbb);
@@ -3289,18 +3298,18 @@ impl<'a> Default for DateArgs {
     }
 }
 
-pub struct DateBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DateBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DateBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DateBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: DateUnit) {
         self.fbb_
             .push_slot::<DateUnit>(Date::VT_UNIT, unit, DateUnit::MILLISECOND);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DateBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DateBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DateBuilder {
             fbb_: _fbb,
@@ -3361,8 +3370,8 @@ impl<'a> Time<'a> {
         Time { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TimeArgs,
     ) -> flatbuffers::WIPOffset<Time<'bldr>> {
         let mut builder = TimeBuilder::new(_fbb);
@@ -3419,11 +3428,11 @@ impl<'a> Default for TimeArgs {
     }
 }
 
-pub struct TimeBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TimeBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TimeBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TimeBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: TimeUnit) {
         self.fbb_
@@ -3434,7 +3443,7 @@ impl<'a: 'b, 'b> TimeBuilder<'a, 'b> {
         self.fbb_.push_slot::<i32>(Time::VT_BITWIDTH, bitWidth, 32);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TimeBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TimeBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TimeBuilder {
             fbb_: _fbb,
@@ -3587,8 +3596,8 @@ impl<'a> Timestamp<'a> {
         Timestamp { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TimestampArgs<'args>,
     ) -> flatbuffers::WIPOffset<Timestamp<'bldr>> {
         let mut builder = TimestampBuilder::new(_fbb);
@@ -3664,11 +3673,11 @@ impl<'a> Default for TimestampArgs<'a> {
     }
 }
 
-pub struct TimestampBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TimestampBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TimestampBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TimestampBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: TimeUnit) {
         self.fbb_
@@ -3680,7 +3689,7 @@ impl<'a: 'b, 'b> TimestampBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Timestamp::VT_TIMEZONE, timezone);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TimestampBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TimestampBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TimestampBuilder {
             fbb_: _fbb,
@@ -3727,8 +3736,8 @@ impl<'a> Interval<'a> {
         Interval { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args IntervalArgs,
     ) -> flatbuffers::WIPOffset<Interval<'bldr>> {
         let mut builder = IntervalBuilder::new(_fbb);
@@ -3774,18 +3783,18 @@ impl<'a> Default for IntervalArgs {
     }
 }
 
-pub struct IntervalBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct IntervalBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> IntervalBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> IntervalBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: IntervalUnit) {
         self.fbb_
             .push_slot::<IntervalUnit>(Interval::VT_UNIT, unit, IntervalUnit::YEAR_MONTH);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> IntervalBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> IntervalBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         IntervalBuilder {
             fbb_: _fbb,
@@ -3831,8 +3840,8 @@ impl<'a> Duration<'a> {
         Duration { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DurationArgs,
     ) -> flatbuffers::WIPOffset<Duration<'bldr>> {
         let mut builder = DurationBuilder::new(_fbb);
@@ -3878,18 +3887,18 @@ impl<'a> Default for DurationArgs {
     }
 }
 
-pub struct DurationBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DurationBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DurationBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DurationBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_unit(&mut self, unit: TimeUnit) {
         self.fbb_
             .push_slot::<TimeUnit>(Duration::VT_UNIT, unit, TimeUnit::MILLISECOND);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> DurationBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> DurationBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DurationBuilder {
             fbb_: _fbb,
@@ -3939,8 +3948,8 @@ impl<'a> KeyValue<'a> {
         KeyValue { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args KeyValueArgs<'args>,
     ) -> flatbuffers::WIPOffset<KeyValue<'bldr>> {
         let mut builder = KeyValueBuilder::new(_fbb);
@@ -4003,11 +4012,11 @@ impl<'a> Default for KeyValueArgs<'a> {
     }
 }
 
-pub struct KeyValueBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct KeyValueBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> KeyValueBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> KeyValueBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_key(&mut self, key: flatbuffers::WIPOffset<&'b str>) {
         self.fbb_
@@ -4019,7 +4028,7 @@ impl<'a: 'b, 'b> KeyValueBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(KeyValue::VT_VALUE, value);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> KeyValueBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> KeyValueBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         KeyValueBuilder {
             fbb_: _fbb,
@@ -4069,8 +4078,8 @@ impl<'a> DictionaryEncoding<'a> {
         DictionaryEncoding { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args DictionaryEncodingArgs<'args>,
     ) -> flatbuffers::WIPOffset<DictionaryEncoding<'bldr>> {
         let mut builder = DictionaryEncodingBuilder::new(_fbb);
@@ -4181,11 +4190,11 @@ impl<'a> Default for DictionaryEncodingArgs<'a> {
     }
 }
 
-pub struct DictionaryEncodingBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct DictionaryEncodingBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> DictionaryEncodingBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> DictionaryEncodingBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_id(&mut self, id: i64) {
         self.fbb_.push_slot::<i64>(DictionaryEncoding::VT_ID, id, 0);
@@ -4212,8 +4221,8 @@ impl<'a: 'b, 'b> DictionaryEncodingBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> DictionaryEncodingBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> DictionaryEncodingBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         DictionaryEncodingBuilder {
             fbb_: _fbb,
@@ -4271,8 +4280,8 @@ impl<'a> Field<'a> {
         Field { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args FieldArgs<'args>,
     ) -> flatbuffers::WIPOffset<Field<'bldr>> {
         let mut builder = FieldBuilder::new(_fbb);
@@ -4955,11 +4964,11 @@ impl<'a> Default for FieldArgs<'a> {
     }
 }
 
-pub struct FieldBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct FieldBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> FieldBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> FieldBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) {
         self.fbb_
@@ -5011,7 +5020,7 @@ impl<'a: 'b, 'b> FieldBuilder<'a, 'b> {
         );
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> FieldBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> FieldBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         FieldBuilder {
             fbb_: _fbb,
@@ -5333,8 +5342,8 @@ impl<'a> Schema<'a> {
         Schema { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SchemaArgs<'args>,
     ) -> flatbuffers::WIPOffset<Schema<'bldr>> {
         let mut builder = SchemaBuilder::new(_fbb);
@@ -5453,11 +5462,11 @@ impl<'a> Default for SchemaArgs<'a> {
     }
 }
 
-pub struct SchemaBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SchemaBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SchemaBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_endianness(&mut self, endianness: Endianness) {
         self.fbb_
@@ -5494,7 +5503,7 @@ impl<'a: 'b, 'b> SchemaBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(Schema::VT_FEATURES, features);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SchemaBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> SchemaBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SchemaBuilder {
             fbb_: _fbb,
@@ -5579,16 +5588,16 @@ pub unsafe fn size_prefixed_root_as_schema_unchecked(buf: &[u8]) -> Schema {
     flatbuffers::size_prefixed_root_unchecked::<Schema>(buf)
 }
 #[inline]
-pub fn finish_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_schema_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Schema<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_schema_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_schema_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Schema<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/SparseTensor.rs b/arrow-ipc/src/gen/SparseTensor.rs
index e03510ec0c8d..21cb7e116c24 100644
--- a/arrow-ipc/src/gen/SparseTensor.rs
+++ b/arrow-ipc/src/gen/SparseTensor.rs
@@ -24,6 +24,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 #[deprecated(
     since = "2.0.0",
     note = "Use associated constants instead. This will no longer be generated in 2021."
@@ -281,8 +283,8 @@ impl<'a> SparseTensorIndexCOO<'a> {
         SparseTensorIndexCOO { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseTensorIndexCOOArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseTensorIndexCOO<'bldr>> {
         let mut builder = SparseTensorIndexCOOBuilder::new(_fbb);
@@ -401,11 +403,11 @@ impl<'a> Default for SparseTensorIndexCOOArgs<'a> {
     }
 }
 
-pub struct SparseTensorIndexCOOBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseTensorIndexCOOBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseTensorIndexCOOBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseTensorIndexCOOBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_indicesType(&mut self, indicesType: flatbuffers::WIPOffset<Int<'b>>) {
         self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
@@ -435,8 +437,8 @@ impl<'a: 'b, 'b> SparseTensorIndexCOOBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCOOBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseTensorIndexCOOBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseTensorIndexCOOBuilder {
             fbb_: _fbb,
@@ -494,8 +496,8 @@ impl<'a> SparseMatrixIndexCSX<'a> {
         SparseMatrixIndexCSX { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseMatrixIndexCSXArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseMatrixIndexCSX<'bldr>> {
         let mut builder = SparseMatrixIndexCSXBuilder::new(_fbb);
@@ -662,11 +664,11 @@ impl<'a> Default for SparseMatrixIndexCSXArgs<'a> {
     }
 }
 
-pub struct SparseMatrixIndexCSXBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseMatrixIndexCSXBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseMatrixIndexCSXBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_compressedAxis(&mut self, compressedAxis: SparseMatrixCompressedAxis) {
         self.fbb_.push_slot::<SparseMatrixCompressedAxis>(
@@ -701,8 +703,8 @@ impl<'a: 'b, 'b> SparseMatrixIndexCSXBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseMatrixIndexCSXBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseMatrixIndexCSXBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseMatrixIndexCSXBuilder {
             fbb_: _fbb,
@@ -765,8 +767,8 @@ impl<'a> SparseTensorIndexCSF<'a> {
         SparseTensorIndexCSF { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseTensorIndexCSFArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseTensorIndexCSF<'bldr>> {
         let mut builder = SparseTensorIndexCSFBuilder::new(_fbb);
@@ -977,11 +979,11 @@ impl<'a> Default for SparseTensorIndexCSFArgs<'a> {
     }
 }
 
-pub struct SparseTensorIndexCSFBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseTensorIndexCSFBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseTensorIndexCSFBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseTensorIndexCSFBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_indptrType(&mut self, indptrType: flatbuffers::WIPOffset<Int<'b>>) {
         self.fbb_.push_slot_always::<flatbuffers::WIPOffset<Int>>(
@@ -1028,8 +1030,8 @@ impl<'a: 'b, 'b> SparseTensorIndexCSFBuilder<'a, 'b> {
     }
     #[inline]
     pub fn new(
-        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
-    ) -> SparseTensorIndexCSFBuilder<'a, 'b> {
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseTensorIndexCSFBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseTensorIndexCSFBuilder {
             fbb_: _fbb,
@@ -1095,8 +1097,8 @@ impl<'a> SparseTensor<'a> {
         SparseTensor { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args SparseTensorArgs<'args>,
     ) -> flatbuffers::WIPOffset<SparseTensor<'bldr>> {
         let mut builder = SparseTensorBuilder::new(_fbb);
@@ -1521,6 +1523,62 @@ impl<'a> SparseTensor<'a> {
         }
     }
 
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_binary_view(&self) -> Option<BinaryView<'a>> {
+        if self.type_type() == Type::BinaryView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { BinaryView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_utf_8_view(&self) -> Option<Utf8View<'a>> {
+        if self.type_type() == Type::Utf8View {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { Utf8View::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_list_view(&self) -> Option<ListView<'a>> {
+        if self.type_type() == Type::ListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { ListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_large_list_view(&self) -> Option<LargeListView<'a>> {
+        if self.type_type() == Type::LargeListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { LargeListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
     #[inline]
     #[allow(non_snake_case)]
     pub fn sparseIndex_as_sparse_tensor_index_coo(&self) -> Option<SparseTensorIndexCOO<'a>> {
@@ -1679,6 +1737,26 @@ impl flatbuffers::Verifiable for SparseTensor<'_> {
                             "Type::RunEndEncoded",
                             pos,
                         ),
+                    Type::BinaryView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<BinaryView>>(
+                            "Type::BinaryView",
+                            pos,
+                        ),
+                    Type::Utf8View => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8View>>(
+                            "Type::Utf8View",
+                            pos,
+                        ),
+                    Type::ListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<ListView>>(
+                            "Type::ListView",
+                            pos,
+                        ),
+                    Type::LargeListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeListView>>(
+                            "Type::LargeListView",
+                            pos,
+                        ),
                     _ => Ok(()),
                 },
             )?
@@ -1744,11 +1822,11 @@ impl<'a> Default for SparseTensorArgs<'a> {
     }
 }
 
-pub struct SparseTensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct SparseTensorBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> SparseTensorBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_type_type(&mut self, type_type: Type) {
         self.fbb_
@@ -1798,7 +1876,9 @@ impl<'a: 'b, 'b> SparseTensorBuilder<'a, 'b> {
             .push_slot_always::<&Buffer>(SparseTensor::VT_DATA, data);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> SparseTensorBuilder<'a, 'b> {
+    pub fn new(
+        _fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
+    ) -> SparseTensorBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         SparseTensorBuilder {
             fbb_: _fbb,
@@ -2042,6 +2122,46 @@ impl core::fmt::Debug for SparseTensor<'_> {
                     )
                 }
             }
+            Type::BinaryView => {
+                if let Some(x) = self.type_as_binary_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::Utf8View => {
+                if let Some(x) = self.type_as_utf_8_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::ListView => {
+                if let Some(x) = self.type_as_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::LargeListView => {
+                if let Some(x) = self.type_as_large_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
             _ => {
                 let x: Option<()> = None;
                 ds.field("type_", &x)
@@ -2153,16 +2273,16 @@ pub unsafe fn size_prefixed_root_as_sparse_tensor_unchecked(buf: &[u8]) -> Spars
     flatbuffers::size_prefixed_root_unchecked::<SparseTensor>(buf)
 }
 #[inline]
-pub fn finish_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_sparse_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<SparseTensor<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_sparse_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_sparse_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<SparseTensor<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/gen/Tensor.rs b/arrow-ipc/src/gen/Tensor.rs
index 1766d95144c2..b332a5d77e96 100644
--- a/arrow-ipc/src/gen/Tensor.rs
+++ b/arrow-ipc/src/gen/Tensor.rs
@@ -23,6 +23,8 @@ use flatbuffers::EndianScalar;
 use std::{cmp::Ordering, mem};
 // automatically generated by the FlatBuffers compiler, do not modify
 
+// @generated
+
 pub enum TensorDimOffset {}
 #[derive(Copy, Clone, PartialEq)]
 
@@ -52,8 +54,8 @@ impl<'a> TensorDim<'a> {
         TensorDim { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TensorDimArgs<'args>,
     ) -> flatbuffers::WIPOffset<TensorDim<'bldr>> {
         let mut builder = TensorDimBuilder::new(_fbb);
@@ -113,11 +115,11 @@ impl<'a> Default for TensorDimArgs<'a> {
     }
 }
 
-pub struct TensorDimBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TensorDimBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TensorDimBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TensorDimBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_size_(&mut self, size_: i64) {
         self.fbb_.push_slot::<i64>(TensorDim::VT_SIZE_, size_, 0);
@@ -128,7 +130,7 @@ impl<'a: 'b, 'b> TensorDimBuilder<'a, 'b> {
             .push_slot_always::<flatbuffers::WIPOffset<_>>(TensorDim::VT_NAME, name);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorDimBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TensorDimBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TensorDimBuilder {
             fbb_: _fbb,
@@ -179,8 +181,8 @@ impl<'a> Tensor<'a> {
         Tensor { _tab: table }
     }
     #[allow(unused_mut)]
-    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr>(
-        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr>,
+    pub fn create<'bldr: 'args, 'args: 'mut_bldr, 'mut_bldr, A: flatbuffers::Allocator + 'bldr>(
+        _fbb: &'mut_bldr mut flatbuffers::FlatBufferBuilder<'bldr, A>,
         args: &'args TensorArgs<'args>,
     ) -> flatbuffers::WIPOffset<Tensor<'bldr>> {
         let mut builder = TensorBuilder::new(_fbb);
@@ -568,6 +570,62 @@ impl<'a> Tensor<'a> {
             None
         }
     }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_binary_view(&self) -> Option<BinaryView<'a>> {
+        if self.type_type() == Type::BinaryView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { BinaryView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_utf_8_view(&self) -> Option<Utf8View<'a>> {
+        if self.type_type() == Type::Utf8View {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { Utf8View::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_list_view(&self) -> Option<ListView<'a>> {
+        if self.type_type() == Type::ListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { ListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
+
+    #[inline]
+    #[allow(non_snake_case)]
+    pub fn type_as_large_list_view(&self) -> Option<LargeListView<'a>> {
+        if self.type_type() == Type::LargeListView {
+            let u = self.type_();
+            // Safety:
+            // Created from a valid Table for this object
+            // Which contains a valid union in this slot
+            Some(unsafe { LargeListView::init_from_table(u) })
+        } else {
+            None
+        }
+    }
 }
 
 impl flatbuffers::Verifiable for Tensor<'_> {
@@ -685,6 +743,26 @@ impl flatbuffers::Verifiable for Tensor<'_> {
                             "Type::RunEndEncoded",
                             pos,
                         ),
+                    Type::BinaryView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<BinaryView>>(
+                            "Type::BinaryView",
+                            pos,
+                        ),
+                    Type::Utf8View => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<Utf8View>>(
+                            "Type::Utf8View",
+                            pos,
+                        ),
+                    Type::ListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<ListView>>(
+                            "Type::ListView",
+                            pos,
+                        ),
+                    Type::LargeListView => v
+                        .verify_union_variant::<flatbuffers::ForwardsUOffset<LargeListView>>(
+                            "Type::LargeListView",
+                            pos,
+                        ),
                     _ => Ok(()),
                 },
             )?
@@ -725,11 +803,11 @@ impl<'a> Default for TensorArgs<'a> {
     }
 }
 
-pub struct TensorBuilder<'a: 'b, 'b> {
-    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub struct TensorBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> {
+    fbb_: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     start_: flatbuffers::WIPOffset<flatbuffers::TableUnfinishedWIPOffset>,
 }
-impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
+impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> TensorBuilder<'a, 'b, A> {
     #[inline]
     pub fn add_type_type(&mut self, type_type: Type) {
         self.fbb_
@@ -760,7 +838,7 @@ impl<'a: 'b, 'b> TensorBuilder<'a, 'b> {
         self.fbb_.push_slot_always::<&Buffer>(Tensor::VT_DATA, data);
     }
     #[inline]
-    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>) -> TensorBuilder<'a, 'b> {
+    pub fn new(_fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>) -> TensorBuilder<'a, 'b, A> {
         let start = _fbb.start_table();
         TensorBuilder {
             fbb_: _fbb,
@@ -1002,6 +1080,46 @@ impl core::fmt::Debug for Tensor<'_> {
                     )
                 }
             }
+            Type::BinaryView => {
+                if let Some(x) = self.type_as_binary_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::Utf8View => {
+                if let Some(x) = self.type_as_utf_8_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::ListView => {
+                if let Some(x) = self.type_as_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
+            Type::LargeListView => {
+                if let Some(x) = self.type_as_large_list_view() {
+                    ds.field("type_", &x)
+                } else {
+                    ds.field(
+                        "type_",
+                        &"InvalidFlatbuffer: Union discriminant does not match value.",
+                    )
+                }
+            }
             _ => {
                 let x: Option<()> = None;
                 ds.field("type_", &x)
@@ -1074,16 +1192,16 @@ pub unsafe fn size_prefixed_root_as_tensor_unchecked(buf: &[u8]) -> Tensor {
     flatbuffers::size_prefixed_root_unchecked::<Tensor>(buf)
 }
 #[inline]
-pub fn finish_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Tensor<'a>>,
 ) {
     fbb.finish(root, None);
 }
 
 #[inline]
-pub fn finish_size_prefixed_tensor_buffer<'a, 'b>(
-    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a>,
+pub fn finish_size_prefixed_tensor_buffer<'a, 'b, A: flatbuffers::Allocator + 'a>(
+    fbb: &'b mut flatbuffers::FlatBufferBuilder<'a, A>,
     root: flatbuffers::WIPOffset<Tensor<'a>>,
 ) {
     fbb.finish_size_prefixed(root, None);
diff --git a/arrow-ipc/src/lib.rs b/arrow-ipc/src/lib.rs
index dde137153964..a76083b93953 100644
--- a/arrow-ipc/src/lib.rs
+++ b/arrow-ipc/src/lib.rs
@@ -43,3 +43,14 @@ pub use self::gen::Tensor::*;
 
 const ARROW_MAGIC: [u8; 6] = [b'A', b'R', b'R', b'O', b'W', b'1'];
 const CONTINUATION_MARKER: [u8; 4] = [0xff; 4];
+
+impl Endianness {
+    /// Returns true if the endianness of the source system matches the endianness of the target system.
+    pub fn equals_to_target_endianness(self) -> bool {
+        match self {
+            Self::Little => cfg!(target_endian = "little"),
+            Self::Big => cfg!(target_endian = "big"),
+            _ => false,
+        }
+    }
+}

From d0260fcffa07a4cb8650cc290ab29027a3a8e65c Mon Sep 17 00:00:00 2001
From: Adam Reeve <adreeve@gmail.com>
Date: Thu, 9 Jan 2025 11:24:03 +1300
Subject: [PATCH 8/9] [Parquet] Add benchmark and test for writing NaNs to
 Parquet (#6955)

* Add test and benchmarks for writing floats with NaNs

* Remove extra benchmark with no NaNs
---
 arrow/Cargo.toml                      |  3 +-
 arrow/src/util/bench_util.rs          | 46 +++++++++++++++++++++++++++
 parquet/benches/arrow_writer.rs       | 33 +++++++++++++++++++
 parquet/src/arrow/arrow_writer/mod.rs | 39 +++++++++++++++++++++++
 4 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index a1c9c0ab2113..76119ec4abb4 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -55,6 +55,7 @@ arrow-string = { workspace = true }
 
 rand = { version = "0.8", default-features = false, features = ["std", "std_rng"], optional = true }
 pyo3 = { version = "0.23", default-features = false, optional = true }
+half = { version = "2.1", default-features = false, optional = true }
 
 [package.metadata.docs.rs]
 features = ["prettyprint", "ipc_compression", "ffi", "pyarrow"]
@@ -70,7 +71,7 @@ prettyprint = ["arrow-cast/prettyprint"]
 # not the core arrow code itself. Be aware that `rand` must be kept as
 # an optional dependency for supporting compile to wasm32-unknown-unknown
 # target without assuming an environment containing JavaScript.
-test_utils = ["dep:rand"]
+test_utils = ["dep:rand", "dep:half"]
 pyarrow = ["pyo3", "ffi"]
 # force_validate runs full data validation for all arrays that are created
 # this is not enabled by default as it is too computationally expensive
diff --git a/arrow/src/util/bench_util.rs b/arrow/src/util/bench_util.rs
index 8eaae36dbe56..53e01034122b 100644
--- a/arrow/src/util/bench_util.rs
+++ b/arrow/src/util/bench_util.rs
@@ -21,6 +21,7 @@ use crate::array::*;
 use crate::datatypes::*;
 use crate::util::test_util::seedable_rng;
 use arrow_buffer::{Buffer, IntervalMonthDayNano};
+use half::f16;
 use rand::distributions::uniform::SampleUniform;
 use rand::thread_rng;
 use rand::Rng;
@@ -416,3 +417,48 @@ where
 
     DictionaryArray::from(data)
 }
+
+/// Creates a random (but fixed-seeded) f16 array of a given size and nan-value density
+pub fn create_f16_array(size: usize, nan_density: f32) -> Float16Array {
+    let mut rng = seedable_rng();
+
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < nan_density {
+                Some(f16::NAN)
+            } else {
+                Some(f16::from_f32(rng.gen()))
+            }
+        })
+        .collect()
+}
+
+/// Creates a random (but fixed-seeded) f32 array of a given size and nan-value density
+pub fn create_f32_array(size: usize, nan_density: f32) -> Float32Array {
+    let mut rng = seedable_rng();
+
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < nan_density {
+                Some(f32::NAN)
+            } else {
+                Some(rng.gen())
+            }
+        })
+        .collect()
+}
+
+/// Creates a random (but fixed-seeded) f64 array of a given size and nan-value density
+pub fn create_f64_array(size: usize, nan_density: f32) -> Float64Array {
+    let mut rng = seedable_rng();
+
+    (0..size)
+        .map(|_| {
+            if rng.gen::<f32>() < nan_density {
+                Some(f64::NAN)
+            } else {
+                Some(rng.gen())
+            }
+        })
+        .collect()
+}
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index bfa333db722c..4166d962b550 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -28,7 +28,9 @@ extern crate parquet;
 use std::sync::Arc;
 
 use arrow::datatypes::*;
+use arrow::util::bench_util::{create_f16_array, create_f32_array, create_f64_array};
 use arrow::{record_batch::RecordBatch, util::data_gen::*};
+use arrow_array::RecordBatchOptions;
 use parquet::file::properties::WriterProperties;
 use parquet::{arrow::ArrowWriter, errors::Result};
 
@@ -181,6 +183,25 @@ fn create_bool_bench_batch_non_null(
     )?)
 }
 
+fn create_float_bench_batch_with_nans(size: usize, nan_density: f32) -> Result<RecordBatch> {
+    let fields = vec![
+        Field::new("_1", DataType::Float16, false),
+        Field::new("_2", DataType::Float32, false),
+        Field::new("_3", DataType::Float64, false),
+    ];
+    let schema = Schema::new(fields);
+    let columns: Vec<arrow_array::ArrayRef> = vec![
+        Arc::new(create_f16_array(size, nan_density)),
+        Arc::new(create_f32_array(size, nan_density)),
+        Arc::new(create_f64_array(size, nan_density)),
+    ];
+    Ok(RecordBatch::try_new_with_options(
+        Arc::new(schema),
+        columns,
+        &RecordBatchOptions::new().with_match_field_names(false),
+    )?)
+}
+
 fn create_list_primitive_bench_batch(
     size: usize,
     null_density: f32,
@@ -459,6 +480,18 @@ fn bench_primitive_writer(c: &mut Criterion) {
         b.iter(|| write_batch_enable_bloom_filter(&batch).unwrap())
     });
 
+    let batch = create_float_bench_batch_with_nans(4096, 0.5).unwrap();
+    group.throughput(Throughput::Bytes(
+        batch
+            .columns()
+            .iter()
+            .map(|f| f.get_array_memory_size() as u64)
+            .sum(),
+    ));
+    group.bench_function("4096 values float with NaNs", |b| {
+        b.iter(|| write_batch(&batch).unwrap())
+    });
+
     group.finish();
 }
 
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 871b140768cb..41f15569fda0 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -1095,6 +1095,7 @@ mod tests {
     use arrow::{array::*, buffer::Buffer};
     use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, NullBuffer};
     use arrow_schema::Fields;
+    use half::f16;
 
     use crate::basic::Encoding;
     use crate::data_type::AsBytes;
@@ -1763,6 +1764,44 @@ mod tests {
         );
     }
 
+    #[test]
+    fn arrow_writer_float_nans() {
+        let f16_field = Field::new("a", DataType::Float16, false);
+        let f32_field = Field::new("b", DataType::Float32, false);
+        let f64_field = Field::new("c", DataType::Float64, false);
+        let schema = Schema::new(vec![f16_field, f32_field, f64_field]);
+
+        let f16_values = (0..MEDIUM_SIZE)
+            .map(|i| {
+                Some(if i % 2 == 0 {
+                    f16::NAN
+                } else {
+                    f16::from_f32(i as f32)
+                })
+            })
+            .collect::<Float16Array>();
+
+        let f32_values = (0..MEDIUM_SIZE)
+            .map(|i| Some(if i % 2 == 0 { f32::NAN } else { i as f32 }))
+            .collect::<Float32Array>();
+
+        let f64_values = (0..MEDIUM_SIZE)
+            .map(|i| Some(if i % 2 == 0 { f64::NAN } else { i as f64 }))
+            .collect::<Float64Array>();
+
+        let batch = RecordBatch::try_new(
+            Arc::new(schema),
+            vec![
+                Arc::new(f16_values),
+                Arc::new(f32_values),
+                Arc::new(f64_values),
+            ],
+        )
+        .unwrap();
+
+        roundtrip(batch, None);
+    }
+
     const SMALL_SIZE: usize = 7;
     const MEDIUM_SIZE: usize = 63;
 

From 88fb9234b39461f44d6b648800c9f8831a7f9e01 Mon Sep 17 00:00:00 2001
From: Xiangpeng Hao <haoxiangpeng123@gmail.com>
Date: Thu, 9 Jan 2025 16:34:10 -0500
Subject: [PATCH 9/9] Add `peek_next_page_offset` to `SerializedPageReader`
 (#6945)

* add peek_next_page_offset

* Update parquet/src/file/serialized_reader.rs

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>

---------

Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
---
 parquet/src/file/serialized_reader.rs | 142 ++++++++++++++++++++++++++
 1 file changed, 142 insertions(+)

diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index a942481f7e4d..81ba0a66463e 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -568,6 +568,63 @@ impl<R: ChunkReader> SerializedPageReader<R> {
             physical_type: meta.column_type(),
         })
     }
+
+    /// Similar to `peek_next_page`, but returns the offset of the next page instead of the page metadata.
+    /// Unlike page metadata, an offset can uniquely identify a page.
+    ///
+    /// This is used when we need to read parquet with row-filter, and we don't want to decompress the page twice.
+    /// This function allows us to check if the next page is being cached or read previously.
+    #[cfg(test)]
+    fn peek_next_page_offset(&mut self) -> Result<Option<usize>> {
+        match &mut self.state {
+            SerializedPageReaderState::Values {
+                offset,
+                remaining_bytes,
+                next_page_header,
+            } => {
+                loop {
+                    if *remaining_bytes == 0 {
+                        return Ok(None);
+                    }
+                    return if let Some(header) = next_page_header.as_ref() {
+                        if let Ok(_page_meta) = PageMetadata::try_from(&**header) {
+                            Ok(Some(*offset))
+                        } else {
+                            // For unknown page type (e.g., INDEX_PAGE), skip and read next.
+                            *next_page_header = None;
+                            continue;
+                        }
+                    } else {
+                        let mut read = self.reader.get_read(*offset as u64)?;
+                        let (header_len, header) = read_page_header_len(&mut read)?;
+                        *offset += header_len;
+                        *remaining_bytes -= header_len;
+                        let page_meta = if let Ok(_page_meta) = PageMetadata::try_from(&header) {
+                            Ok(Some(*offset))
+                        } else {
+                            // For unknown page type (e.g., INDEX_PAGE), skip and read next.
+                            continue;
+                        };
+                        *next_page_header = Some(Box::new(header));
+                        page_meta
+                    };
+                }
+            }
+            SerializedPageReaderState::Pages {
+                page_locations,
+                dictionary_page,
+                ..
+            } => {
+                if let Some(page) = dictionary_page {
+                    Ok(Some(page.offset as usize))
+                } else if let Some(page) = page_locations.front() {
+                    Ok(Some(page.offset as usize))
+                } else {
+                    Ok(None)
+                }
+            }
+        }
+    }
 }
 
 impl<R: ChunkReader> Iterator for SerializedPageReader<R> {
@@ -802,6 +859,8 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
 
 #[cfg(test)]
 mod tests {
+    use std::collections::HashSet;
+
     use bytes::Buf;
 
     use crate::file::properties::{EnabledStatistics, WriterProperties};
@@ -1107,6 +1166,89 @@ mod tests {
         assert_eq!(page_count, 2);
     }
 
+    fn get_serialized_page_reader<R: ChunkReader>(
+        file_reader: &SerializedFileReader<R>,
+        row_group: usize,
+        column: usize,
+    ) -> Result<SerializedPageReader<R>> {
+        let row_group = {
+            let row_group_metadata = file_reader.metadata.row_group(row_group);
+            let props = Arc::clone(&file_reader.props);
+            let f = Arc::clone(&file_reader.chunk_reader);
+            SerializedRowGroupReader::new(
+                f,
+                row_group_metadata,
+                file_reader
+                    .metadata
+                    .offset_index()
+                    .map(|x| x[row_group].as_slice()),
+                props,
+            )?
+        };
+
+        let col = row_group.metadata.column(column);
+
+        let page_locations = row_group
+            .offset_index
+            .map(|x| x[column].page_locations.clone());
+
+        let props = Arc::clone(&row_group.props);
+        SerializedPageReader::new_with_properties(
+            Arc::clone(&row_group.chunk_reader),
+            col,
+            row_group.metadata.num_rows() as usize,
+            page_locations,
+            props,
+        )
+    }
+
+    #[test]
+    fn test_peek_next_page_offset_matches_actual() -> Result<()> {
+        let test_file = get_test_file("alltypes_plain.parquet");
+        let reader = SerializedFileReader::new(test_file)?;
+
+        let mut offset_set = HashSet::new();
+        let num_row_groups = reader.metadata.num_row_groups();
+        for row_group in 0..num_row_groups {
+            let num_columns = reader.metadata.row_group(row_group).num_columns();
+            for column in 0..num_columns {
+                let mut page_reader = get_serialized_page_reader(&reader, row_group, column)?;
+
+                while let Ok(Some(page_offset)) = page_reader.peek_next_page_offset() {
+                    match &page_reader.state {
+                        SerializedPageReaderState::Pages {
+                            page_locations,
+                            dictionary_page,
+                            ..
+                        } => {
+                            if let Some(page) = dictionary_page {
+                                assert_eq!(page.offset as usize, page_offset);
+                            } else if let Some(page) = page_locations.front() {
+                                assert_eq!(page.offset as usize, page_offset);
+                            } else {
+                                unreachable!()
+                            }
+                        }
+                        SerializedPageReaderState::Values {
+                            offset,
+                            next_page_header,
+                            ..
+                        } => {
+                            assert!(next_page_header.is_some());
+                            assert_eq!(*offset, page_offset);
+                        }
+                    }
+                    let page = page_reader.get_next_page()?;
+                    assert!(page.is_some());
+                    let newly_inserted = offset_set.insert(page_offset);
+                    assert!(newly_inserted);
+                }
+            }
+        }
+
+        Ok(())
+    }
+
     #[test]
     fn test_page_iterator() {
         let file = get_test_file("alltypes_plain.parquet");