Skip to content

Commit

Permalink
port feature drain_filter (#292) to v2
Browse files Browse the repository at this point in the history
  • Loading branch information
matzemathics committed Jan 14, 2024
1 parent a410c67 commit 0d244f4
Show file tree
Hide file tree
Showing 4 changed files with 265 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
target
Cargo.lock
/fuzz/hfuzz_target
/.vscode
2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ documentation = "https://docs.rs/smallvec/"
write = []
specialization = []
may_dangle = []
drain_filter = []
drain_keep_rest = ["drain_filter"]

[dependencies]
serde = { version = "1", optional = true, default-features = false }
Expand Down
237 changes: 237 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,178 @@ impl<'a, T: 'a, const N: usize> Drop for Drain<'a, T, N> {
}
}

#[cfg(feature = "drain_filter")]
/// An iterator which uses a closure to determine if an element should be removed.
///
/// Returned from [`SmallVec::drain_filter`][1].
///
/// [1]: struct.SmallVec.html#method.drain_filter
pub struct DrainFilter<'a, T, const N: usize, F>
where
F: FnMut(&mut T) -> bool,
{
vec: &'a mut SmallVec<T, N>,
/// The index of the item that will be inspected by the next call to `next`.
idx: usize,
/// The number of items that have been drained (removed) thus far.
del: usize,
/// The original length of `vec` prior to draining.
old_len: usize,
/// The filter test predicate.
pred: F,
/// A flag that indicates a panic has occurred in the filter test predicate.
/// This is used as a hint in the drop implementation to prevent consumption
/// of the remainder of the `DrainFilter`. Any unprocessed items will be
/// backshifted in the `vec`, but no further items will be dropped or
/// tested by the filter predicate.
panic_flag: bool,
}

#[cfg(feature = "drain_filter")]
impl<T, F, const N: usize> Iterator for DrainFilter<'_, T, N, F>
where
F: FnMut(&mut T) -> bool,
{
type Item = T;

fn next(&mut self) -> Option<T> {
unsafe {
while self.idx < self.old_len {
let i = self.idx;
let v = core::slice::from_raw_parts_mut(self.vec.as_mut_ptr(), self.old_len);
self.panic_flag = true;
let drained = (self.pred)(&mut v[i]);
self.panic_flag = false;
// Update the index *after* the predicate is called. If the index
// is updated prior and the predicate panics, the element at this
// index would be leaked.
self.idx += 1;
if drained {
self.del += 1;
return Some(core::ptr::read(&v[i]));
} else if self.del > 0 {
let del = self.del;
let src: *const Self::Item = &v[i];
let dst: *mut Self::Item = &mut v[i - del];
core::ptr::copy_nonoverlapping(src, dst, 1);
}
}
None
}
}

fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.old_len - self.idx))
}
}

#[cfg(feature = "drain_filter")]
impl<T, F, const N: usize> Drop for DrainFilter<'_, T, N, F>
where
F: FnMut(&mut T) -> bool,
{
fn drop(&mut self) {
struct BackshiftOnDrop<'a, 'b, T, const N: usize, F>
where
F: FnMut(&mut T) -> bool,
{
drain: &'b mut DrainFilter<'a, T, N, F>,
}

impl<'a, 'b, T, const N: usize, F> Drop for BackshiftOnDrop<'a, 'b, T, N, F>
where
F: FnMut(&mut T) -> bool,
{
fn drop(&mut self) {
unsafe {
if self.drain.idx < self.drain.old_len && self.drain.del > 0 {
// This is a pretty messed up state, and there isn't really an
// obviously right thing to do. We don't want to keep trying
// to execute `pred`, so we just backshift all the unprocessed
// elements and tell the vec that they still exist. The backshift
// is required to prevent a double-drop of the last successfully
// drained item prior to a panic in the predicate.
let ptr = self.drain.vec.as_mut_ptr();
let src = ptr.add(self.drain.idx);
let dst = src.sub(self.drain.del);
let tail_len = self.drain.old_len - self.drain.idx;
src.copy_to(dst, tail_len);
}
self.drain.vec.set_len(self.drain.old_len - self.drain.del);
}
}
}

let backshift = BackshiftOnDrop { drain: self };

// Attempt to consume any remaining elements if the filter predicate
// has not yet panicked. We'll backshift any remaining elements
// whether we've already panicked or if the consumption here panics.
if !backshift.drain.panic_flag {
backshift.drain.for_each(drop);
}
}
}

#[cfg(feature = "drain_keep_rest")]
impl<T, F, const N: usize> DrainFilter<'_, T, N, F>
where
F: FnMut(&mut T) -> bool,
{
/// Keep unyielded elements in the source `Vec`.
///
/// # Examples
///
/// ```
/// # use smallvec::{smallvec, SmallVec};
///
/// let mut vec: SmallVec<char, 2> = smallvec!['a', 'b', 'c'];
/// let mut drain = vec.drain_filter(|_| true);
///
/// assert_eq!(drain.next().unwrap(), 'a');
///
/// // This call keeps 'b' and 'c' in the vec.
/// drain.keep_rest();
///
/// // If we wouldn't call `keep_rest()`,
/// // `vec` would be empty.
/// assert_eq!(vec, SmallVec::<char, 2>::from_slice(&['b', 'c']));
/// ```
pub fn keep_rest(self) {
// At this moment layout looks like this:
//
// _____________________/-- old_len
// / \
// [kept] [yielded] [tail]
// \_______/ ^-- idx
// \-- del
//
// Normally `Drop` impl would drop [tail] (via .for_each(drop), ie still calling `pred`)
//
// 1. Move [tail] after [kept]
// 2. Update length of the original vec to `old_len - del`
// a. In case of ZST, this is the only thing we want to do
// 3. Do *not* drop self, as everything is put in a consistent state already, there is nothing to do
let mut this = ManuallyDrop::new(self);

unsafe {
// ZSTs have no identity, so we don't need to move them around.
let needs_move = core::mem::size_of::<T>() != 0;

if needs_move && this.idx < this.old_len && this.del > 0 {
let ptr = this.vec.as_mut_ptr();
let src = ptr.add(this.idx);
let dst = src.sub(this.del);
let tail_len = this.old_len - this.idx;
src.copy_to(dst, tail_len);
}

let new_len = this.old_len - this.del;
this.vec.set_len(new_len);
}
}
}

/// An iterator that consumes a `SmallVec` and yields its items by value.
///
/// Returned from [`SmallVec::into_iter`][1].
Expand Down Expand Up @@ -732,6 +904,71 @@ impl<T, const N: usize> SmallVec<T, N> {
}
}

#[cfg(feature = "drain_filter")]
/// Creates an iterator which uses a closure to determine if an element should be removed.
///
/// If the closure returns true, the element is removed and yielded. If the closure returns
/// false, the element will remain in the vector and will not be yielded by the iterator.
///
/// Using this method is equivalent to the following code:
/// ```
/// # use smallvec::SmallVec;
/// # let some_predicate = |x: &mut i32| { *x == 2 || *x == 3 || *x == 6 };
/// # let mut vec: SmallVec<i32, 8> = SmallVec::from_slice(&[1i32, 2, 3, 4, 5, 6]);
/// let mut i = 0;
/// while i < vec.len() {
/// if some_predicate(&mut vec[i]) {
/// let val = vec.remove(i);
/// // your code here
/// } else {
/// i += 1;
/// }
/// }
///
/// # assert_eq!(vec, SmallVec::<i32, 8>::from_slice(&[1i32, 4, 5]));
/// ```
/// ///
/// But `drain_filter` is easier to use. `drain_filter` is also more efficient,
/// because it can backshift the elements of the array in bulk.
///
/// Note that `drain_filter` also lets you mutate every element in the filter closure,
/// regardless of whether you choose to keep or remove it.
///
/// # Examples
///
/// Splitting an array into evens and odds, reusing the original allocation:
///
/// ```
/// # use smallvec::SmallVec;
/// let mut numbers: SmallVec<i32, 16> = SmallVec::from_slice(&[1i32, 2, 3, 4, 5, 6, 8, 9, 11, 13, 14, 15]);
///
/// let evens = numbers.drain_filter(|x| *x % 2 == 0).collect::<SmallVec<i32, 16>>();
/// let odds = numbers;
///
/// assert_eq!(evens, SmallVec::<i32, 16>::from_slice(&[2i32, 4, 6, 8, 14]));
/// assert_eq!(odds, SmallVec::<i32, 16>::from_slice(&[1i32, 3, 5, 9, 11, 13, 15]));
/// ```
pub fn drain_filter<F>(&mut self, filter: F) -> DrainFilter<'_, T, N, F>
where
F: FnMut(&mut T) -> bool,
{
let old_len = self.len();

// Guard against us getting leaked (leak amplification)
unsafe {
self.set_len(0);
}

DrainFilter {
vec: self,
idx: 0,
del: 0,
old_len,
pred: filter,
panic_flag: false,
}
}

#[inline]
pub fn push(&mut self, value: T) {
let len = self.len();
Expand Down
25 changes: 25 additions & 0 deletions src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1003,3 +1003,28 @@ fn test_clone_from() {
b.clone_from(&c);
assert_eq!(&*b, &[20, 21, 22]);
}

#[cfg(feature = "drain_filter")]
#[test]
fn drain_filter() {
let mut a: SmallVec<u8, 2> = smallvec![1u8, 2, 3, 4, 5, 6, 7, 8];

let b: SmallVec<u8, 2> = a.drain_filter(|x| *x % 3 == 0).collect();

assert_eq!(a, SmallVec::<u8, 2>::from_slice(&[1u8, 2, 4, 5, 7, 8]));
assert_eq!(b, SmallVec::<u8, 2>::from_slice(&[3u8, 6]));
}

#[cfg(feature = "drain_keep_rest")]
#[test]
fn drain_keep_rest() {
let mut a: SmallVec<i32, 3> = smallvec![1i32, 2, 3, 4, 5, 6, 7, 8];
let mut df = a.drain_filter(|x| *x % 2 == 0);

assert_eq!(df.next().unwrap(), 2);
assert_eq!(df.next().unwrap(), 4);

df.keep_rest();

assert_eq!(a, SmallVec::<i32, 3>::from_slice(&[1i32, 3, 5, 6, 7, 8]));
}

0 comments on commit 0d244f4

Please sign in to comment.