diff --git a/stacker/Cargo.toml b/stacker/Cargo.toml index e0080a9a42..d974be3864 100644 --- a/stacker/Cargo.toml +++ b/stacker/Cargo.toml @@ -11,6 +11,7 @@ description = "term hashmap used for indexing" murmurhash32 = "0.3" common = { version = "0.6", path = "../common/", package = "tantivy-common" } ahash = { version = "0.8.3", default-features = false, optional = true } +rand_distr = "0.4.3" [[bench]] harness = false diff --git a/stacker/fuzz_test/Cargo.toml b/stacker/fuzz_test/Cargo.toml new file mode 100644 index 0000000000..02478c95b6 --- /dev/null +++ b/stacker/fuzz_test/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "fuzz_test" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +ahash = "0.8.7" +rand = "0.8.5" +rand_distr = "0.4.3" +tantivy-stacker = { version = "0.2.0", path = ".." } + +[workspace] + diff --git a/stacker/fuzz_test/src/main.rs b/stacker/fuzz_test/src/main.rs new file mode 100644 index 0000000000..2367ddc33f --- /dev/null +++ b/stacker/fuzz_test/src/main.rs @@ -0,0 +1,45 @@ +use ahash::AHashMap; +use rand::{rngs::StdRng, Rng, SeedableRng}; +use rand_distr::Exp; +use tantivy_stacker::ArenaHashMap; + +fn main() { + for _ in 0..1_000_000 { + let seed: u64 = rand::random(); + test_with_seed(seed); + } +} + +fn test_with_seed(seed: u64) { + let mut hash_map = AHashMap::new(); + let mut arena_hashmap = ArenaHashMap::default(); + let mut rng = StdRng::seed_from_u64(seed); + let key_count = rng.gen_range(1_000..=1_000_000); + let exp = Exp::new(0.05).unwrap(); + + for _ in 0..key_count { + let key_length = rng.sample::(exp).min(u16::MAX as f32).max(1.0) as usize; + + let key: Vec = (0..key_length).map(|_| rng.gen()).collect(); + + arena_hashmap.mutate_or_create(&key, |current_count| { + let count: u64 = current_count.unwrap_or(0); + count + 1 + }); + hash_map.entry(key).and_modify(|e| *e += 1).or_insert(1); + } + + println!( + "Seed: {} \t {:.2}MB", + seed, + arena_hashmap.memory_arena.len() as f32 / 1024.0 / 1024.0 + ); + // Check the contents of the ArenaHashMap + for (key, addr) in arena_hashmap.iter() { + let count: u64 = arena_hashmap.read(addr); + let count_expected = hash_map + .get(key) + .unwrap_or_else(|| panic!("NOT FOUND: Key: {:?}, Count: {}", key, count)); + assert_eq!(count, *count_expected); + } +} diff --git a/stacker/src/memory_arena.rs b/stacker/src/memory_arena.rs index 50564eee26..5c5bf44cf7 100644 --- a/stacker/src/memory_arena.rs +++ b/stacker/src/memory_arena.rs @@ -113,6 +113,15 @@ impl MemoryArena { self.pages.len() * PAGE_SIZE } + /// Returns the number of bytes allocated in the arena. + pub fn len(&self) -> usize { + self.pages.len().saturating_sub(1) * PAGE_SIZE + self.pages.last().unwrap().len + } + + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + #[inline] pub fn write_at(&mut self, addr: Addr, val: Item) { let dest = self.slice_mut(addr, std::mem::size_of::());