Skip to content

Commit

Permalink
use only 1 bit per account in index (#2218)
Browse files Browse the repository at this point in the history
* use only 1 bit per account in index

* add stat for exceptional entry in index

* add variation of occupy tests

* fmt

* fix comment
  • Loading branch information
jeffwashington authored Jul 23, 2024
1 parent 44b7b38 commit ab04ff8
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 12 deletions.
10 changes: 10 additions & 0 deletions accounts-db/src/bucket_map_holder_stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,16 @@ impl BucketMapHolderStats {
.unwrap_or_default(),
i64
),
(
"index_exceptional_entry",
disk.map(|disk| disk
.stats
.index
.index_uses_uncommon_slot_list_len_or_refcount
.load(Ordering::Relaxed))
.unwrap_or_default(),
i64
),
(
"disk_index_data_file_size",
disk.map(|disk| disk.stats.data.total_file_size.load(Ordering::Relaxed))
Expand Down
10 changes: 10 additions & 0 deletions bucket_map/src/bucket.rs
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,10 @@ impl<'b, T: Clone + Copy + PartialEq + std::fmt::Debug + 'static> Bucket<T> {
if let Some(single_element) = data.next() {
OccupiedEnum::OneSlotInIndex(single_element)
} else {
self.stats
.index
.index_uses_uncommon_slot_list_len_or_refcount
.store(true, Ordering::Relaxed);
OccupiedEnum::ZeroSlots
},
);
Expand Down Expand Up @@ -651,6 +655,10 @@ impl<'b, T: Clone + Copy + PartialEq + std::fmt::Debug + 'static> Bucket<T> {
&mut self.index,
OccupiedEnum::MultipleSlots(&multiple_slots),
);
self.stats
.index
.index_uses_uncommon_slot_list_len_or_refcount
.store(true, Ordering::Relaxed);
success = true;
break;
}
Expand Down Expand Up @@ -1234,9 +1242,11 @@ mod tests {
// This causes it to be skipped.
let entry = IndexEntryPlaceInBucket::new(ix);
entry.init(&mut index, &(other.0));
entry.set_slot_count_enum_value(&mut index, OccupiedEnum::ZeroSlots);
let entry = IndexEntryPlaceInBucket::new(ix + 1);
// sets pubkey value and enum value of ZeroSlots. Leaving it at zero is illegal at startup, so we'll assert when we find this duplicate.
entry.init(&mut index, &(raw[0].0));
entry.set_slot_count_enum_value(&mut index, OccupiedEnum::ZeroSlots);

// since the same key is already in use with a different value, it is a duplicate.
// But, it is a zero length entry. This is not supported at startup. Startup would have never generated a zero length occupied entry.
Expand Down
3 changes: 2 additions & 1 deletion bucket_map/src/bucket_stats.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use std::sync::{
atomic::{AtomicU64, Ordering},
atomic::{AtomicBool, AtomicU64, Ordering},
Arc,
};

Expand All @@ -22,6 +22,7 @@ pub struct BucketStats {
pub file_count: AtomicU64,
pub total_file_size: AtomicU64,
pub startup: StartupBucketStats,
pub index_uses_uncommon_slot_list_len_or_refcount: AtomicBool,
}

impl BucketStats {
Expand Down
29 changes: 27 additions & 2 deletions bucket_map/src/bucket_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,12 +541,15 @@ impl<O: BucketOccupied> BucketStorage<O> {
mod test {
use {
super::*,
crate::{bucket_storage::BucketOccupied, index_entry::IndexBucket},
crate::{
bucket_storage::BucketOccupied,
index_entry::{BucketWithHeader, IndexBucket},
},
tempfile::tempdir,
};

#[test]
fn test_bucket_storage() {
fn test_bucket_storage_index_bucket() {
let tmpdir = tempdir().unwrap();
let paths: Vec<PathBuf> = vec![tmpdir.path().to_path_buf()];
assert!(!paths.is_empty());
Expand All @@ -557,13 +560,35 @@ mod test {
let max_search = 1;
let stats = Arc::default();
let count = Arc::default();
// this uses `IndexBucket`. `IndexBucket` doesn't change state on `occupy()`
let mut storage = BucketStorage::<IndexBucket<u64>>::new(
drives, num_elems, elem_size, max_search, stats, count,
)
.0;
let ix = 0;
assert!(storage.is_free(ix));
assert!(storage.occupy(ix, false).is_ok());
}

#[test]
fn test_bucket_storage_using_header() {
let tmpdir = tempdir().unwrap();
let paths: Vec<PathBuf> = vec![tmpdir.path().to_path_buf()];
assert!(!paths.is_empty());

let drives = Arc::new(paths);
let num_elems = 1;
let elem_size = std::mem::size_of::<crate::index_entry::IndexEntry<u64>>() as u64;
let max_search = 1;
let stats = Arc::default();
let count = Arc::default();
let mut storage = BucketStorage::<BucketWithHeader>::new(
drives, num_elems, elem_size, max_search, stats, count,
)
.0;
let ix = 0;
assert!(storage.is_free(ix));
assert!(storage.occupy(ix, false).is_ok());
assert!(storage.occupy(ix, false).is_err());
assert!(!storage.is_free(ix));
storage.free(ix);
Expand Down
36 changes: 27 additions & 9 deletions bucket_map/src/index_entry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,21 +83,38 @@ impl BucketOccupied for BucketWithHeader {
#[derive(Debug)]
pub struct IndexBucketUsingBitVecBits<T: PartialEq + 'static> {
/// 2 bits per entry that represent a 4 state enum tag
pub enum_tag: BitVec,
pub enum_tag_first_bit: BitVec,
/// second will be empty in all healthy cases because in real use, we only use enum values 0 and 2 (and we use the high bit for first)
pub enum_tag_second_bit: BitVec,
/// number of elements allocated
capacity: u64,
_phantom: PhantomData<&'static T>,
}

impl<T: Copy + PartialEq + 'static> IndexBucketUsingBitVecBits<T> {
/// set the 2 bits (first and second) in `enum_tag`
fn set_bits(&mut self, ix: u64, first: bool, second: bool) {
self.enum_tag.set(ix * 2, first);
self.enum_tag.set(ix * 2 + 1, second);
pub(crate) fn set_bits(&mut self, ix: u64, first: bool, second: bool) {
self.enum_tag_first_bit.set(ix, first);
if self.enum_tag_second_bit.is_empty() {
if !second {
// enum_tag_second_bit can remain empty.
// The first time someone sets the second bit, we have to allocate and check it.
return;
}
self.enum_tag_second_bit = BitVec::new_fill(false, self.capacity);
}
self.enum_tag_second_bit.set(ix, second);
}
/// get the 2 bits (first and second) in `enum_tag`
fn get_bits(&self, ix: u64) -> (bool, bool) {
(self.enum_tag.get(ix * 2), self.enum_tag.get(ix * 2 + 1))
(
self.enum_tag_first_bit.get(ix),
if self.enum_tag_second_bit.is_empty() {
false
} else {
self.enum_tag_second_bit.get(ix)
},
)
}
/// turn the tag into bits and store them
fn set_enum_tag(&mut self, ix: u64, value: OccupiedEnumTag) {
Expand All @@ -115,7 +132,6 @@ impl<T: Copy + PartialEq + 'static> IndexBucketUsingBitVecBits<T> {
impl<T: Copy + PartialEq + 'static> BucketOccupied for IndexBucketUsingBitVecBits<T> {
fn occupy(&mut self, element: &mut [u8], ix: usize) {
assert!(self.is_free(element, ix));
self.set_enum_tag(ix as u64, OccupiedEnumTag::ZeroSlots);
}
fn free(&mut self, element: &mut [u8], ix: usize) {
assert!(!self.is_free(element, ix));
Expand All @@ -130,8 +146,10 @@ impl<T: Copy + PartialEq + 'static> BucketOccupied for IndexBucketUsingBitVecBit
}
fn new(capacity: Capacity) -> Self {
Self {
// note: twice as many bits allocated as `num_elements` because we store 2 bits per element
enum_tag: BitVec::new_fill(false, capacity.capacity() * 2),
// up to 2 bits per element
// 1 bit per element in the ideal case, so don't allocate the 2nd bits until necessary
enum_tag_first_bit: BitVec::new_fill(false, capacity.capacity()),
enum_tag_second_bit: BitVec::new(),
capacity: capacity.capacity(),
_phantom: PhantomData,
}
Expand Down Expand Up @@ -295,6 +313,7 @@ enum OccupiedEnumTag {
#[default]
Free = 0,
ZeroSlots = 1,
/// this should be value 2 so that we can store Free and OneSlotInIndex in only 1 bit. These are the primary states.
OneSlotInIndex = 2,
MultipleSlots = 3,
}
Expand Down Expand Up @@ -382,7 +401,6 @@ impl<T: Copy + PartialEq + 'static> IndexEntryPlaceInBucket<T> {
}

pub fn init(&self, index_bucket: &mut BucketStorage<IndexBucket<T>>, pubkey: &Pubkey) {
self.set_slot_count_enum_value(index_bucket, OccupiedEnum::ZeroSlots);
let index_entry = index_bucket.get_mut::<IndexEntry<T>>(self.ix);
index_entry.key = *pubkey;
}
Expand Down

0 comments on commit ab04ff8

Please sign in to comment.