From 003ba1f2af97228f252b3e9ec8bc38fa4547ded8 Mon Sep 17 00:00:00 2001 From: Dmitri Makarov Date: Mon, 16 Sep 2024 16:59:43 -0400 Subject: [PATCH] Tweak ancient packing algorithm --- accounts-db/src/accounts_db.rs | 37 +++++++- accounts-db/src/ancient_append_vecs.rs | 118 +++++++++++++++++++------ 2 files changed, 126 insertions(+), 29 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 162ce25cede85e..b162ee1abe9330 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -1479,7 +1479,7 @@ pub struct AccountsDb { /// Set of stores which are recently rooted or had accounts removed /// such that potentially a 0-lamport account update could be present which /// means we can remove the account from the index entirely. - dirty_stores: DashMap>, + pub(crate) dirty_stores: DashMap>, /// Zero-lamport accounts that are *not* purged during clean because they need to stay alive /// for incremental snapshot support. @@ -1521,6 +1521,11 @@ pub struct AccountsDb { /// The latest full snapshot slot dictates how to handle zero lamport accounts latest_full_snapshot_slot: SeqLock>, + + /// These are the ancient storages that could be valuable to shrink. + /// sorted by largest dead bytes to smallest + /// Members are Slot and capacity. If capacity is smaller, then that means the storage was already shrunk. + pub(crate) best_ancient_slots_to_shrink: RwLock>, } #[derive(Debug, Default)] @@ -2491,6 +2496,7 @@ impl AccountsDb { const ACCOUNTS_STACK_SIZE: usize = 8 * 1024 * 1024; AccountsDb { + best_ancient_slots_to_shrink: RwLock::default(), create_ancient_storage: CreateAncientStorage::default(), verify_accounts_hash_in_bg: VerifyAccountsHashInBackground::default(), active_stats: ActiveStats::default(), @@ -5068,7 +5074,7 @@ impl AccountsDb { let shrink_candidates_slots = std::mem::take(&mut *self.shrink_candidate_slots.lock().unwrap()); - let (shrink_slots, shrink_slots_next_batch) = { + let (mut shrink_slots, shrink_slots_next_batch) = { if let AccountShrinkThreshold::TotalSpace { shrink_ratio } = self.shrink_ratio { let (shrink_slots, shrink_slots_next_batch) = self.select_candidates_by_total_usage(&shrink_candidates_slots, shrink_ratio); @@ -5089,6 +5095,31 @@ impl AccountsDb { } }; + let mut limit = 2; + if shrink_slots.len() >= limit { + limit = shrink_slots.len() + 1; + } + let mut ancients = self.best_ancient_slots_to_shrink.write().unwrap(); + for (slot, capacity) in ancients.iter_mut() { + if *capacity == 0 || shrink_slots.contains(slot) { + // already dealt with + continue; + } + // we will be done processing this suggestion no matter what + if let Some(store) = self.storage.get_slot_storage_entry(*slot) { + if *capacity != store.capacity() || !Self::is_candidate_for_shrink(self, &store) { + *capacity = 0; + // ignore this one + continue; + } + *capacity = 0; + shrink_slots.insert(*slot, store); + + if shrink_slots.len() >= limit { + break; + } + } + } if shrink_slots.is_empty() && shrink_slots_next_batch .as_ref() @@ -8060,7 +8091,7 @@ impl AccountsDb { true } - fn is_candidate_for_shrink(&self, store: &AccountStorageEntry) -> bool { + pub(crate) fn is_candidate_for_shrink(&self, store: &AccountStorageEntry) -> bool { // appended ancient append vecs should not be shrunk by the normal shrink codepath. // It is not possible to identify ancient append vecs when we pack, so no check for ancient when we are not appending. let total_bytes = if self.create_ancient_storage == CreateAncientStorage::Append diff --git a/accounts-db/src/ancient_append_vecs.rs b/accounts-db/src/ancient_append_vecs.rs index 68d4f0b365e9fd..6c8d3396d57252 100644 --- a/accounts-db/src/ancient_append_vecs.rs +++ b/accounts-db/src/ancient_append_vecs.rs @@ -27,9 +27,13 @@ use { }, }; -/// this many # of highest slot values should be treated as desirable to pack. +/// This number of highest slot values should be treated as desirable to pack. /// This gives us high slots to move packed accounts into. const HIGH_SLOT_OFFSET: u64 = 100; +/// The minimal number of smallest storage entries kept in +/// `all_infos`, when `all_infos` entries are filtered for combining +/// into storages of `ideal_storage_size`. +const MIN_SMALLEST_INCLUDED_COUNT: u64 = 0; /// ancient packing algorithm tuning per pass #[derive(Debug)] @@ -79,6 +83,8 @@ struct AncientSlotInfos { total_alive_bytes_shrink: Saturating, /// total alive bytes across all slots total_alive_bytes: Saturating, + /// best_slots_to_shrink + best_slots_to_shrink: Vec<(Slot, u64)>, } impl AncientSlotInfos { @@ -177,8 +183,10 @@ impl AncientSlotInfos { * tuning.percent_of_alive_shrunk_data / 100, ); + self.best_slots_to_shrink = Vec::with_capacity(self.shrink_indexes.len()); for info_index in &self.shrink_indexes { let info = &mut self.all_infos[*info_index]; + self.best_slots_to_shrink.push((info.slot, info.capacity)); if bytes_to_shrink_due_to_ratio.0 >= threshold_bytes { // we exceeded the amount to shrink due to alive ratio, so don't shrink this one just due to 'should_shrink' // It MAY be shrunk based on total capacity still. @@ -210,7 +218,9 @@ impl AncientSlotInfos { tuning: &PackedAncientStorageTuning, stats: &ShrinkAncientStats, ) { - // these indexes into 'all_infos' are useless once we truncate 'all_infos', so make sure they're cleared out to avoid any issues + // These indexes into 'all_infos' are useless once we truncate + // 'all_infos', so make sure they're cleared out to avoid any + // issues. self.shrink_indexes.clear(); let total_storages = self.all_infos.len(); let mut cumulative_bytes = Saturating(0u64); @@ -218,23 +228,31 @@ impl AncientSlotInfos { let mut bytes_from_must_shrink = 0; let mut bytes_from_smallest_storages = 0; let mut bytes_from_newest_storages = 0; + // Always including some smallest. We alrady include some newest. + let mut smallest_included = 0; for (i, info) in self.all_infos.iter().enumerate() { cumulative_bytes += info.alive_bytes; let ancient_storages_required = div_ceil(cumulative_bytes.0, tuning.ideal_storage_size) as usize; let storages_remaining = total_storages - i - 1; - // if the remaining uncombined storages and the # of resulting - // combined ancient storages are less than the threshold, then - // we've gone too far, so get rid of this entry and all after it. - // Every storage after this one is larger than the ones we've chosen. - // if we ever get to more than `max_resulting_storages` required ancient storages, that is enough to stop for now. - // It will take a lot of time for the pack algorithm to create that many, and that is bad for system performance. - // This should be a limit that only affects extreme testing environments. - // We do not stop including entries until we have dealt with all the high slot #s. This allows the algorithm to continue - // to make progress each time it is called. There are exceptions that can cause the pack to fail, such as accounts with multiple - // refs. + // If the remaining uncombined storages and the number of + // resulting combined ancient storages are less than the + // threshold, then we've gone too far, so get rid of this + // entry and all after it. Every storage after this one + // is larger than the ones we've chosen. If we ever get + // to more than `max_resulting_storages` required ancient + // storages, that is enough to stop for now. It will take + // a lot of time for the pack algorithm to create that + // many, and that is bad for system performance. This + // should be a limit that only affects extreme testing + // environments. We do not stop including entries until + // we have dealt with all the high slot numbers. This + // allows the algorithm to continue to make progress each + // time it is called. There are exceptions that can cause + // the pack to fail, such as accounts with multiple refs. if !info.is_high_slot + && smallest_included > MIN_SMALLEST_INCLUDED_COUNT && (storages_remaining + ancient_storages_required < low_threshold || ancient_storages_required as u64 > u64::from(tuning.max_resulting_storages)) { @@ -247,6 +265,7 @@ impl AncientSlotInfos { bytes_from_newest_storages += info.alive_bytes; } else { bytes_from_smallest_storages += info.alive_bytes; + smallest_included += 1; } } stats @@ -390,13 +409,19 @@ impl AccountsDb { fn combine_ancient_slots_packed_internal( &self, sorted_slots: Vec, - tuning: PackedAncientStorageTuning, + mut tuning: PackedAncientStorageTuning, metrics: &mut ShrinkStatsSub, ) { self.shrink_ancient_stats .slots_considered .fetch_add(sorted_slots.len() as u64, Ordering::Relaxed); - let ancient_slot_infos = self.collect_sort_filter_ancient_slots(sorted_slots, &tuning); + let mut ancient_slot_infos = + self.collect_sort_filter_ancient_slots(sorted_slots, &mut tuning); + + std::mem::swap( + &mut *self.best_ancient_slots_to_shrink.write().unwrap(), + &mut ancient_slot_infos.best_slots_to_shrink, + ); if ancient_slot_infos.all_infos.is_empty() { return; // nothing to do @@ -493,12 +518,13 @@ impl AccountsDb { fn collect_sort_filter_ancient_slots( &self, slots: Vec, - tuning: &PackedAncientStorageTuning, + tuning: &mut PackedAncientStorageTuning, ) -> AncientSlotInfos { let mut ancient_slot_infos = self.calc_ancient_slot_info( slots, tuning.can_randomly_shrink, tuning.ideal_storage_size, + tuning, ); ancient_slot_infos.filter_ancient_slots(tuning, &self.shrink_ancient_stats); @@ -539,6 +565,7 @@ impl AccountsDb { slots: Vec, can_randomly_shrink: bool, ideal_size: NonZeroU64, + tuning: &mut PackedAncientStorageTuning, ) -> AncientSlotInfos { let len = slots.len(); let mut infos = AncientSlotInfos { @@ -577,6 +604,11 @@ impl AccountsDb { }) .count() .saturating_sub(randoms as usize); + // ideal storage size is total alive bytes of ancient storages / half of max ancient slots + tuning.ideal_storage_size = NonZeroU64::new( + (infos.total_alive_bytes.0 / tuning.max_ancient_slots.max(1) as u64 * 2).max(5_000_000), + ) + .unwrap(); self.shrink_ancient_stats .slots_eligible_to_shrink .fetch_add(should_shrink_count as u64, Ordering::Relaxed); @@ -2539,14 +2571,24 @@ pub mod tests { ); } TestCollectInfo::CalcAncientSlotInfo => { + let mut tuning = PackedAncientStorageTuning { + percent_of_alive_shrunk_data: 100, + max_ancient_slots: 0, + // irrelevant for what this test is trying to test, but necessary to avoid minimums + ideal_storage_size: NonZeroU64::new(get_ancient_append_vec_capacity()) + .unwrap(), + can_randomly_shrink, + ..default_tuning() + }; infos = db.calc_ancient_slot_info( vec![slot1], can_randomly_shrink, NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + &mut tuning, ); } TestCollectInfo::CollectSortFilterInfo => { - let tuning = PackedAncientStorageTuning { + let mut tuning = PackedAncientStorageTuning { percent_of_alive_shrunk_data: 100, max_ancient_slots: 0, // irrelevant for what this test is trying to test, but necessary to avoid minimums @@ -2555,7 +2597,7 @@ pub mod tests { can_randomly_shrink, ..default_tuning() }; - infos = db.collect_sort_filter_ancient_slots(vec![slot1], &tuning); + infos = db.collect_sort_filter_ancient_slots(vec![slot1], &mut tuning); } } assert_eq!(infos.all_infos.len(), 1, "{method:?}"); @@ -2604,10 +2646,19 @@ pub mod tests { high_slot, ); } else { + let mut tuning = PackedAncientStorageTuning { + percent_of_alive_shrunk_data: 100, + max_ancient_slots: 0, + // irrelevant for what this test is trying to test, but necessary to avoid minimums + ideal_storage_size: NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + can_randomly_shrink, + ..default_tuning() + }; infos = db.calc_ancient_slot_info( vec![slot1], can_randomly_shrink, NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + &mut tuning, ); } assert!(infos.all_infos.is_empty()); @@ -2620,6 +2671,7 @@ pub mod tests { #[test] fn test_calc_ancient_slot_info_several() { let can_randomly_shrink = false; + let mut tuning = default_tuning(); for alive in [true, false] { for slots in 0..4 { // 1_040_000 is big enough relative to page size to cause shrink ratio to be triggered @@ -2638,6 +2690,7 @@ pub mod tests { slot_vec.clone(), can_randomly_shrink, NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + &mut tuning, ); if !alive { assert!(infos.all_infos.is_empty()); @@ -2679,6 +2732,7 @@ pub mod tests { #[test] fn test_calc_ancient_slot_info_one_alive_one_dead() { let can_randomly_shrink = false; + let mut tuning = default_tuning(); for method in TestCollectInfo::iter() { for slot1_is_alive in [false, true] { let alives = [false /*dummy*/, slot1_is_alive, !slot1_is_alive]; @@ -2720,12 +2774,13 @@ pub mod tests { slot_vec.clone(), can_randomly_shrink, NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + &mut tuning, ), TestCollectInfo::Add => { continue; // unsupportable } TestCollectInfo::CollectSortFilterInfo => { - let tuning = PackedAncientStorageTuning { + let mut tuning = PackedAncientStorageTuning { percent_of_alive_shrunk_data: 100, max_ancient_slots: 0, // irrelevant @@ -2736,7 +2791,7 @@ pub mod tests { can_randomly_shrink, ..default_tuning() }; - db.collect_sort_filter_ancient_slots(slot_vec.clone(), &tuning) + db.collect_sort_filter_ancient_slots(slot_vec.clone(), &mut tuning) } }; assert_eq!(infos.all_infos.len(), 1, "method: {method:?}"); @@ -2889,10 +2944,11 @@ pub mod tests { fn test_filter_by_smallest_capacity_high_slot_more() { let tuning = default_tuning(); + let num_smallest = (MIN_SMALLEST_INCLUDED_COUNT + 1) as usize; // Ensure we have more storages with high slots than the 'max resulting storages'. - let num_high_slots = tuning.max_resulting_storages.get() * 2; + let num_high_slots = (tuning.max_resulting_storages.get() * 2) as usize; let num_ancient_storages = num_high_slots * 3; - let mut infos = create_test_infos(num_ancient_storages as usize); + let mut infos = create_test_infos(num_ancient_storages); infos .all_infos .sort_unstable_by_key(|slot_info| slot_info.slot); @@ -2900,7 +2956,7 @@ pub mod tests { .all_infos .iter_mut() .rev() - .take(num_high_slots as usize) + .take(num_high_slots) .for_each(|slot_info| { slot_info.is_high_slot = true; }); @@ -2909,20 +2965,28 @@ pub mod tests { .iter() .filter_map(|slot_info| slot_info.is_high_slot.then_some(slot_info.slot)) .collect(); - // shuffle the infos so they actually need to be sorted infos.all_infos.shuffle(&mut thread_rng()); infos.filter_by_smallest_capacity(&tuning, &ShrinkAncientStats::default()); + // the number of all_infos entries should include the number + // of high_slot entries plus the minumal number of smallest + // storage entries. + assert_eq!(infos.all_infos.len(), num_high_slots + num_smallest); infos .all_infos .sort_unstable_by_key(|slot_info| slot_info.slot); + // Check that only the last num_high_slots are correct. The + // smallest ones are randomly selected, and difficult to + // check. let slots_actual: Vec<_> = infos .all_infos .iter() + .rev() + .take(num_high_slots) .map(|slot_info| slot_info.slot) + .rev() .collect(); - assert_eq!(infos.all_infos.len() as u64, num_high_slots); assert_eq!(slots_actual, slots_expected); } @@ -3099,6 +3163,7 @@ pub mod tests { #[test] fn test_calc_ancient_slot_info_one_shrink_one_not() { let can_randomly_shrink = false; + let mut tuning = default_tuning(); for method in TestCollectInfo::iter() { for slot1_shrink in [false, true] { let shrinks = [false /*dummy*/, slot1_shrink, !slot1_shrink]; @@ -3140,12 +3205,13 @@ pub mod tests { slot_vec.clone(), can_randomly_shrink, NonZeroU64::new(get_ancient_append_vec_capacity()).unwrap(), + &mut tuning, ), TestCollectInfo::Add => { continue; // unsupportable } TestCollectInfo::CollectSortFilterInfo => { - let tuning = PackedAncientStorageTuning { + let mut tuning = PackedAncientStorageTuning { percent_of_alive_shrunk_data: 100, max_ancient_slots: 0, // irrelevant for what this test is trying to test, but necessary to avoid minimums @@ -3155,7 +3221,7 @@ pub mod tests { ..default_tuning() }; // note this can sort infos.all_infos - db.collect_sort_filter_ancient_slots(slot_vec.clone(), &tuning) + db.collect_sort_filter_ancient_slots(slot_vec.clone(), &mut tuning) } };