From 93ca6968e9dc1ecc943efb6a567f6c47a5da6f38 Mon Sep 17 00:00:00 2001 From: Maxim Sokolov Date: Wed, 12 Apr 2023 12:20:45 -0700 Subject: [PATCH 1/2] implement iterator which starts at arbitrary location --- src/map.rs | 156 ++++++++++++++++++++++++++++++++++++++++++++++++- src/raw/mod.rs | 22 ++++++- 2 files changed, 176 insertions(+), 2 deletions(-) diff --git a/src/map.rs b/src/map.rs index 6938801293..b6d042515c 100644 --- a/src/map.rs +++ b/src/map.rs @@ -1,4 +1,6 @@ -use crate::raw::{Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawTable}; +use crate::raw::{ + Allocator, Bucket, Global, RawDrain, RawIntoIter, RawIter, RawIterRange, RawTable, +}; use crate::{Equivalent, TryReserveError}; use core::borrow::Borrow; use core::fmt::{self, Debug}; @@ -798,6 +800,55 @@ impl HashMap { } } + /// An iterator visiting all key-value pairs in arbitrary order starting at some arbitrary + /// location computed based on `hint`. Hint could be any `usize` integer. + /// + /// If all elements of the table are imagined on a ring then `hint` changes the + /// starting point on the ring but does not change relative positions of elements + /// on the ring itself. The relative positions on the ring are the same as for [`iter`] + /// if we allow the order to wrap around. + /// + /// The iterator element type is `(&'a K, &'a V)`. + /// + /// [`iter`]: struct.HashMap.html#method.iter + /// + /// # Examples + /// + /// ``` + /// use hashbrown::HashMap; + /// + /// let mut map = HashMap::new(); + /// map.insert("a", 1); + /// map.insert("b", 2); + /// map.insert("c", 3); + /// assert_eq!(map.len(), 3); + /// let mut vec: Vec<(&str, i32)> = Vec::new(); + /// + /// for (key, val) in map.iter_at(0x517cc1b727220a95_usize) { + /// println!("key: {} val: {}", key, val); + /// vec.push((*key, *val)); + /// } + /// + /// // The `Iter` iterator produces items in arbitrary order, so the + /// // items must be sorted to test them against a sorted array. + /// vec.sort_unstable(); + /// assert_eq!(vec, [("a", 1), ("b", 2), ("c", 3)]); + /// + /// assert_eq!(map.len(), 3); + /// ``` + #[cfg_attr(feature = "inline-more", inline)] + pub fn iter_at(&self, hint: usize) -> IterHinted<'_, K, V> { + // Here we tie the lifetime of self to the iter. + unsafe { + IterHinted { + inner_head: self.table.iter_at(hint), + inner_tail: self.table.iter_at(0), + items: self.len(), + marker: PhantomData, + } + } + } + /// An iterator visiting all key-value pairs in arbitrary order, /// with mutable references to the values. /// The iterator element type is `(&'a K, &'a mut V)`. @@ -4821,6 +4872,75 @@ impl ExactSizeIterator for Iter<'_, K, V> { impl FusedIterator for Iter<'_, K, V> {} +/// An iterator over the entries of a `HashMap` in arbitrary order starting at some arbitrary +/// location computed based on `hint`. Hint could be any `usize` integer.. +/// The iterator element type is `(&'a K, &'a V)`. +/// +/// This `struct` is created by the [`iter_at`] method on [`HashMap`]. See its +/// documentation for more. +/// +/// [`iter_at`]: struct.HashMap.html#method.iter_at +/// [`HashMap`]: struct.HashMap.html +/// +/// # Examples +/// +/// ``` +/// use hashbrown::HashMap; +/// +/// let map: HashMap<_, _> = [(1, "a"), (2, "b"), (3, "c")].into(); +/// +/// let mut iter = map.iter_at(0x517cc1b727220a95_usize); +/// let mut vec = vec![iter.next(), iter.next(), iter.next()]; +/// +/// // The `IterHinted` iterator produces items in arbitrary order, so the +/// // items must be sorted to test them against a sorted array. +/// vec.sort_unstable(); +/// assert_eq!(vec, [Some((&1, &"a")), Some((&2, &"b")), Some((&3, &"c"))]); +/// +/// // It is fused iterator +/// assert_eq!(iter.next(), None); +/// assert_eq!(iter.next(), None); +/// ``` +pub struct IterHinted<'a, K, V> { + inner_head: RawIterRange<(K, V)>, + inner_tail: RawIterRange<(K, V)>, + items: usize, + marker: PhantomData<(&'a K, &'a V)>, +} + +impl<'a, K, V> Iterator for IterHinted<'a, K, V> { + type Item = (&'a K, &'a V); + + #[cfg_attr(feature = "inline-more", inline)] + fn next(&mut self) -> Option<(&'a K, &'a V)> { + if self.items == 0 { + return None; + } + loop { + // Avoid `Option::map` because it bloats LLVM IR. + match self.inner_head.next() { + Some(x) => { + self.items -= 1; + unsafe { + let r = x.as_ref(); + return Some((&r.0, &r.1)); + } + } + None => { + mem::swap(&mut self.inner_head, &mut self.inner_tail); + } + } + } + } + + #[cfg_attr(feature = "inline-more", inline)] + fn size_hint(&self) -> (usize, Option) { + (self.items, Some(self.items)) + } +} + +impl FusedIterator for IterHinted<'_, K, V> {} + impl<'a, K, V> Iterator for IterMut<'a, K, V> { type Item = (&'a K, &'a mut V); @@ -8575,4 +8695,38 @@ mod test_map { ); let _map2 = map1.clone(); } + + #[test] + fn test_iter_at() { + #[cfg(miri)] + const N: usize = 32; + #[cfg(not(miri))] + const N: usize = 512; + for i in 0..N { + let mut h = HashMap::new(); + for j in 0..i { + h.insert(j, j); + } + let mut s = vec![0usize; i]; + for (k, _) in h.iter_at(0) { + s[*k] += 1; + } + for (idx, v) in s.iter().enumerate() { + assert_eq!(*v, 1, "i={} idx={} v={}", i, idx, *v); + } + #[cfg(miri)] + const K: usize = 1; + #[cfg(not(miri))] + const K: usize = 16; + for k in 0..K { + let hint = 0x517cc1b727220a95_usize.wrapping_mul(i).wrapping_add(k); + for (k, _) in h.iter_at(hint) { + s[*k] += 1; + } + for (idx, v) in s.iter().enumerate() { + assert_eq!(*v, 1 + 1 + k, "i={} hint={} idx={} v={}", i, hint, idx, *v); + } + } + } + } } diff --git a/src/raw/mod.rs b/src/raw/mod.rs index 94958a0924..601426735f 100644 --- a/src/raw/mod.rs +++ b/src/raw/mod.rs @@ -1409,6 +1409,26 @@ impl RawTable { } } + /// Returns an iterator over subset of elements in the table starting + /// at arbitrary location computed based on `hint`. If `hint` is zero it + /// guarantees to return all elements of the table in arbitrary order. + /// The subset of elements returned with non-zero `hint` is some tail of + /// elements returned with zero `hint`. + /// + /// It is up to the caller to ensure that the `RawTable` outlives the `RawIterRange`. + /// Because we cannot make the `next` method unsafe on the `RawIterRange` + /// struct, we have to make the `iter_at` method unsafe. + #[inline] + pub unsafe fn iter_at(&self, hint: usize) -> RawIterRange { + let index = hint & self.table.bucket_mask & !(Group::WIDTH - 1); + let data = Bucket::from_base_index(self.data_end(), index); + RawIterRange::new( + self.table.ctrl.as_ptr().add(index), + data, + self.table.buckets() - index, + ) + } + /// Returns an iterator over occupied buckets that could match a given hash. /// /// `RawTable` only stores 7 bits of the hash value, so this iterator may @@ -2709,7 +2729,7 @@ impl IntoIterator for RawTable { /// Iterator over a sub-range of a table. Unlike `RawIter` this iterator does /// not track an item count. -pub(crate) struct RawIterRange { +pub struct RawIterRange { // Mask of full buckets in the current group. Bits are cleared from this // mask as each element is processed. current_group: BitMask, From 24883c724f3d9a8d7de7e281cd666fcb0510845f Mon Sep 17 00:00:00 2001 From: Maxim Sokolov Date: Wed, 12 Apr 2023 12:40:55 -0700 Subject: [PATCH 2/2] fix overflowing literal --- src/map.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/map.rs b/src/map.rs index b6d042515c..b14e8bf892 100644 --- a/src/map.rs +++ b/src/map.rs @@ -824,7 +824,7 @@ impl HashMap { /// assert_eq!(map.len(), 3); /// let mut vec: Vec<(&str, i32)> = Vec::new(); /// - /// for (key, val) in map.iter_at(0x517cc1b727220a95_usize) { + /// for (key, val) in map.iter_at(0x517cc1b727220a95u64 as usize) { /// println!("key: {} val: {}", key, val); /// vec.push((*key, *val)); /// } @@ -4889,7 +4889,7 @@ impl FusedIterator for Iter<'_, K, V> {} /// /// let map: HashMap<_, _> = [(1, "a"), (2, "b"), (3, "c")].into(); /// -/// let mut iter = map.iter_at(0x517cc1b727220a95_usize); +/// let mut iter = map.iter_at(0x517cc1b727220a95u64 as usize); /// let mut vec = vec![iter.next(), iter.next(), iter.next()]; /// /// // The `IterHinted` iterator produces items in arbitrary order, so the @@ -8719,7 +8719,9 @@ mod test_map { #[cfg(not(miri))] const K: usize = 16; for k in 0..K { - let hint = 0x517cc1b727220a95_usize.wrapping_mul(i).wrapping_add(k); + let hint = (0x517cc1b727220a95u64 as usize) + .wrapping_mul(i) + .wrapping_add(k); for (k, _) in h.iter_at(hint) { s[*k] += 1; }