Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Array::shrink_to_fit(&self) -> ArrayRef #6787

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions arrow-array/src/array/boolean_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,13 @@ impl Array for BooleanArray {
self.values.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
values: self.values.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
self.values.offset()
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/byte_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,15 @@ impl<T: ByteArrayType> Array for GenericByteArray<T> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
value_data: self.value_data.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
48 changes: 31 additions & 17 deletions arrow-array/src/array/byte_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -430,31 +430,31 @@ impl<T: ByteViewType + ?Sized> GenericByteViewArray<T> {
///
/// Before GC:
/// ```text
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// ┌──────┐
/// │......│
/// │......│
/// ┌────────────────────┐ ┌ ─ ─ ─ ▶ │Data1 │ Large buffer
/// │ View 1 │─ ─ ─ ─ │......│ with data that
/// ├────────────────────┤ │......│ is not referred
/// │ View 2 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data2 │ to by View 1 or
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// └────────────────────┘ │......│ View 2
/// │......│
/// 2 views, refer to │......│
/// small portions of a └──────┘
/// large buffer
/// ```
///
///
/// After GC:
///
/// ```text
/// ┌────────────────────┐ ┌─────┐ After gc, only
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
/// │ View 1 │─ ─ ─ ─ ─ ─ ─ ─▶ │Data1│ data that is
/// ├────────────────────┤ ┌ ─ ─ ─ ▶ │Data2│ pointed to by
/// │ View 2 │─ ─ ─ ─ └─────┘ the views is
/// └────────────────────┘ left
///
///
/// 2 views
/// ```
/// This method will compact the data buffers by recreating the view array and only include the data
/// that is pointed to by the views.
Expand Down Expand Up @@ -575,6 +575,20 @@ impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
self.views.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
views: self.views.clone().shrink_to_fit(),
buffers: self
.buffers
.iter()
.map(|b| b.clone().shrink_to_fit())
.collect(),
phantom: self.phantom,
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
19 changes: 19 additions & 0 deletions arrow-array/src/array/dictionary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -720,6 +720,21 @@ impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
self.keys.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
keys: self
.keys
.shrink_to_fit()
.as_any()
.downcast_ref::<PrimitiveArray<T>>()
.unwrap()
.clone(),
values: self.values.shrink_to_fit(),
is_ordered: self.is_ordered,
})
}

fn offset(&self) -> usize {
self.keys.offset()
}
Expand Down Expand Up @@ -874,6 +889,10 @@ impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'_, K, V
self.dictionary.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
unimplemented!("shrink_to_fit cannot be implemented for TypedDictionaryArray")
}

fn offset(&self) -> usize {
self.dictionary.offset()
}
Expand Down
10 changes: 10 additions & 0 deletions arrow-array/src/array/fixed_size_binary_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -603,6 +603,16 @@ impl Array for FixedSizeBinaryArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
value_data: self.value_data.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
len: self.len,
value_length: self.value_length,
})
}

fn offset(&self) -> usize {
0
}
Expand Down
10 changes: 10 additions & 0 deletions arrow-array/src/array/fixed_size_list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,16 @@ impl Array for FixedSizeListArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
values: self.values.shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
value_length: self.value_length,
len: self.len,
})
}

fn offset(&self) -> usize {
0
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,15 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize> {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
values: self.values.shrink_to_fit(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
10 changes: 10 additions & 0 deletions arrow-array/src/array/list_view_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,16 @@ impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSize> {
self.value_sizes.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
values: self.values.shrink_to_fit(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
value_sizes: self.value_sizes.clone().shrink_to_fit(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
16 changes: 16 additions & 0 deletions arrow-array/src/array/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,22 @@ impl Array for MapArray {
self.value_offsets.len() <= 1
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
entries: self
.entries
.clone()
.shrink_to_fit()
.as_any()
.downcast_ref::<StructArray>()
.unwrap()
.clone(),
value_offsets: self.value_offsets.clone().shrink_to_fit(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
11 changes: 11 additions & 0 deletions arrow-array/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,10 @@ pub trait Array: std::fmt::Debug + Send + Sync {
/// ```
fn is_empty(&self) -> bool;

/// Frees up unused memory.
#[must_use]
fn shrink_to_fit(&self) -> ArrayRef;

/// Returns the offset into the underlying data used by this array(-slice).
/// Note that the underlying data can be shared by many arrays.
/// This defaults to `0`.
Expand Down Expand Up @@ -365,6 +369,10 @@ impl Array for ArrayRef {
self.as_ref().is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
self.as_ref().shrink_to_fit()
}

fn offset(&self) -> usize {
self.as_ref().offset()
}
Expand Down Expand Up @@ -434,6 +442,9 @@ impl<T: Array> Array for &T {
fn is_empty(&self) -> bool {
T::is_empty(self)
}
fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(T::shrink_to_fit(self))
}

fn offset(&self) -> usize {
T::offset(self)
Expand Down
4 changes: 4 additions & 0 deletions arrow-array/src/array/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ impl Array for NullArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self { len: self.len })
}

fn offset(&self) -> usize {
0
}
Expand Down
8 changes: 8 additions & 0 deletions arrow-array/src/array/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,14 @@ impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
self.values.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
values: self.values.clone().shrink_to_fit(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
12 changes: 12 additions & 0 deletions arrow-array/src/array/run_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,14 @@ impl<T: RunEndIndexType> Array for RunArray<T> {
self.run_ends.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
run_ends: self.run_ends.clone().shrink_to_fit(),
values: self.values.shrink_to_fit(),
})
}

fn offset(&self) -> usize {
self.run_ends.offset()
}
Expand Down Expand Up @@ -584,6 +592,10 @@ impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
self.run_array.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
unimplemented!("shrink_to_fit cannot be implemented for TypedRunArray")
}

fn offset(&self) -> usize {
self.run_array.offset()
}
Expand Down
9 changes: 9 additions & 0 deletions arrow-array/src/array/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,15 @@ impl Array for StructArray {
self.len == 0
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
len: self.len,
data_type: self.data_type.clone(),
nulls: self.nulls.clone().map(|n| n.shrink_to_fit()),
fields: self.fields.iter().map(|n| n.shrink_to_fit()).collect(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
13 changes: 13 additions & 0 deletions arrow-array/src/array/union_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,19 @@ impl Array for UnionArray {
self.type_ids.is_empty()
}

fn shrink_to_fit(&self) -> ArrayRef {
Arc::new(Self {
data_type: self.data_type.clone(),
type_ids: self.type_ids.clone().shrink_to_fit(),
offsets: self.offsets.clone().map(|o| o.shrink_to_fit()),
fields: self
.fields
.iter()
.map(|option| option.as_ref().map(|n| n.shrink_to_fit()))
.collect(),
})
}

fn offset(&self) -> usize {
0
}
Expand Down
12 changes: 12 additions & 0 deletions arrow-buffer/src/buffer/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ impl BooleanBuffer {
self.len == 0
}

/// Free up unused memory.
#[inline]
#[must_use]
pub fn shrink_to_fit(self) -> Self {
Self {
// TODO: we could shrink even more in the case where we are a small sub-slice of the full buffer
buffer: self.buffer.shrink_to_fit(),
offset: self.offset,
len: self.len,
}
}

/// Returns the boolean value at index `i`.
///
/// # Panics
Expand Down
Loading