-
Notifications
You must be signed in to change notification settings - Fork 254
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Aggressively shrink ancient storages when shrink isn't too busy. #2946
Changes from 18 commits
372780e
132eddd
7e79341
6feba95
d3fcd70
82ba7ef
444ae0f
c3f6e88
80051cd
9a919c8
618c47b
e79a529
acc539b
e253a42
7c2f5f2
fa967e2
2c5b3c1
dd5f265
61c15b7
af85db2
2948f80
c72217e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -141,6 +141,11 @@ const MAX_ITEMS_PER_CHUNK: Slot = 2_500; | |
// This allows us to split up accounts index accesses across multiple threads. | ||
const SHRINK_COLLECT_CHUNK_SIZE: usize = 50; | ||
|
||
/// The number of shrink candidate slots that is small enough so that | ||
/// additional storages from ancient slots can be added to the | ||
/// candidates for shrinking. | ||
const SHRINK_INSERT_ANCIENT_THRESHOLD: usize = 10; | ||
|
||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)] | ||
pub enum CreateAncientStorage { | ||
/// ancient storages are created by appending | ||
|
@@ -1463,6 +1468,11 @@ pub struct AccountsDb { | |
/// Flag to indicate if the experimental accounts lattice hash is enabled. | ||
/// (For R&D only; a feature-gate also exists to turn this on and make it a part of consensus.) | ||
pub is_experimental_accumulator_hash_enabled: AtomicBool, | ||
|
||
/// These are the ancient storages that could be valuable to shrink. | ||
/// sorted by largest dead bytes to smallest | ||
/// Members are Slot and capacity. If capacity is smaller, then that means the storage was already shrunk. | ||
pub(crate) best_ancient_slots_to_shrink: RwLock<Vec<(Slot, u64)>>, | ||
} | ||
|
||
/// results from 'split_storages_ancient' | ||
|
@@ -1811,6 +1821,7 @@ impl AccountsDb { | |
is_experimental_accumulator_hash_enabled: default_accounts_db_config | ||
.enable_experimental_accumulator_hash | ||
.into(), | ||
best_ancient_slots_to_shrink: RwLock::default(), | ||
} | ||
} | ||
|
||
|
@@ -4352,8 +4363,12 @@ impl AccountsDb { | |
|
||
let shrink_candidates_slots = | ||
std::mem::take(&mut *self.shrink_candidate_slots.lock().unwrap()); | ||
self.shrink_stats | ||
jeffwashington marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.initial_candidates_count | ||
.store(shrink_candidates_slots.len() as u64, Ordering::Relaxed); | ||
|
||
let candidates_count = shrink_candidates_slots.len(); | ||
let ((shrink_slots, shrink_slots_next_batch), select_time_us) = measure_us!({ | ||
let ((mut shrink_slots, shrink_slots_next_batch), select_time_us) = measure_us!({ | ||
if let AccountShrinkThreshold::TotalSpace { shrink_ratio } = self.shrink_ratio { | ||
let (shrink_slots, shrink_slots_next_batch) = | ||
self.select_candidates_by_total_usage(&shrink_candidates_slots, shrink_ratio); | ||
|
@@ -4374,6 +4389,30 @@ impl AccountsDb { | |
} | ||
}); | ||
|
||
// If there are too few slots to shrink, add an ancient slot | ||
// for shrinking. The best ancient slots to shrink are | ||
// assumed to be in reverse order. | ||
if shrink_slots.len() < SHRINK_INSERT_ANCIENT_THRESHOLD { | ||
let mut ancients = self.best_ancient_slots_to_shrink.write().unwrap(); | ||
while let Some((slot, capacity)) = ancients.pop() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the |
||
if let Some(store) = self.storage.get_slot_storage_entry(slot) { | ||
if !shrink_slots.contains(&slot) | ||
&& capacity == store.capacity() | ||
&& Self::is_candidate_for_shrink(self, &store) | ||
{ | ||
let ancient_bytes_added_to_shrink = store.alive_bytes() as u64; | ||
shrink_slots.insert(slot, store); | ||
self.shrink_stats | ||
.ancient_bytes_added_to_shrink | ||
.fetch_add(ancient_bytes_added_to_shrink, Ordering::Relaxed); | ||
self.shrink_stats | ||
.ancient_slots_added_to_shrink | ||
.fetch_add(1, Ordering::Relaxed); | ||
break; | ||
} | ||
} | ||
} | ||
} | ||
if shrink_slots.is_empty() | ||
&& shrink_slots_next_batch | ||
.as_ref() | ||
|
@@ -9113,7 +9152,9 @@ pub mod tests { | |
accounts_hash::MERKLE_FANOUT, | ||
accounts_index::{tests::*, AccountSecondaryIndexesIncludeExclude}, | ||
ancient_append_vecs, | ||
append_vec::{test_utils::TempFile, AppendVec, AppendVecStoredAccountMeta}, | ||
append_vec::{ | ||
aligned_stored_size, test_utils::TempFile, AppendVec, AppendVecStoredAccountMeta, | ||
}, | ||
storable_accounts::AccountForStorage, | ||
}, | ||
assert_matches::assert_matches, | ||
|
@@ -12015,6 +12056,83 @@ pub mod tests { | |
); | ||
} | ||
|
||
/// This test creates an ancient storage with three alive accounts | ||
/// of various sizes. It then simulates killing one of the | ||
/// accounts in a more recent (non-ancient) slot by overwriting | ||
/// the account that has the smallest data size. The dead account | ||
/// is expected to be deleted from its ancient storage in the | ||
/// process of shrinking candidate slots. The capacity of the | ||
/// storage after shrinking is expected to be the sum of alive | ||
/// bytes of the two remaining alive ancient accounts. | ||
#[test] | ||
fn test_shrink_candidate_slots_with_dead_ancient_account() { | ||
solana_logger::setup(); | ||
let epoch_schedule = EpochSchedule::default(); | ||
let num_ancient_slots = 3; | ||
// Prepare 3 append vecs to combine [medium, big, small] | ||
let account_data_sizes = vec![1000, 2000, 150]; | ||
let (db, starting_ancient_slot) = | ||
create_db_with_storages_and_index_with_customized_account_size_per_slot( | ||
true, | ||
num_ancient_slots, | ||
account_data_sizes, | ||
); | ||
db.add_root(starting_ancient_slot); | ||
let slots_to_combine: Vec<Slot> = | ||
(starting_ancient_slot..starting_ancient_slot + num_ancient_slots as Slot).collect(); | ||
db.combine_ancient_slots(slots_to_combine, CAN_RANDOMLY_SHRINK_FALSE); | ||
let storage = db.get_storage_for_slot(starting_ancient_slot).unwrap(); | ||
let ancient_accounts = db.get_unique_accounts_from_storage(&storage); | ||
// Check that three accounts are indeed present in the combined storage. | ||
assert_eq!(ancient_accounts.stored_accounts.len(), 3); | ||
// Find an ancient account with smallest data length. | ||
// This will be a dead account, overwritten in the current slot. | ||
let modified_account_pubkey = ancient_accounts | ||
.stored_accounts | ||
.iter() | ||
.min_by(|a, b| a.data_len.cmp(&b.data_len)) | ||
.unwrap() | ||
.pubkey; | ||
let modified_account_owner = *AccountSharedData::default().owner(); | ||
let modified_account = AccountSharedData::new(223, 0, &modified_account_owner); | ||
let ancient_append_vec_offset = db.ancient_append_vec_offset.unwrap().abs(); | ||
let current_slot = epoch_schedule.slots_per_epoch + ancient_append_vec_offset as u64 + 1; | ||
// Simulate killing of the ancient account by overwriting it in the current slot. | ||
db.store_for_tests( | ||
current_slot, | ||
&[(&modified_account_pubkey, &modified_account)], | ||
); | ||
db.calculate_accounts_delta_hash(current_slot); | ||
db.add_root_and_flush_write_cache(current_slot); | ||
// This should remove the dead ancient account from the index. | ||
db.clean_accounts_for_tests(); | ||
db.shrink_ancient_slots(&epoch_schedule); | ||
let storage = db.get_storage_for_slot(starting_ancient_slot).unwrap(); | ||
let created_accounts = db.get_unique_accounts_from_storage(&storage); | ||
// The dead account should still be in the ancient storage, | ||
// because the storage wouldn't be shrunk with normal alive to | ||
// capacity ratio. | ||
assert_eq!(created_accounts.stored_accounts.len(), 3); | ||
db.shrink_candidate_slots(&epoch_schedule); | ||
let storage = db.get_storage_for_slot(starting_ancient_slot).unwrap(); | ||
let created_accounts = db.get_unique_accounts_from_storage(&storage); | ||
// At this point the dead ancient account should be removed | ||
// and storage capacity shrunk to the sum of alive bytes of | ||
// accounts it holds. This is the data lengths of the | ||
// accounts plus the length of their metadata. | ||
assert_eq!( | ||
created_accounts.capacity as usize, | ||
aligned_stored_size(1000) + aligned_stored_size(2000) | ||
); | ||
// The above check works only when the AppendVec storage is | ||
// used. More generally the pubkey of the smallest account | ||
// shouldn't be present in the shrunk storage, which is | ||
// validated by the following scan of the storage accounts. | ||
storage.accounts.scan_pubkeys(|pubkey| { | ||
assert_ne!(pubkey, &modified_account_pubkey); | ||
}); | ||
} | ||
|
||
#[test] | ||
fn test_select_candidates_by_total_usage_no_candidates() { | ||
// no input candidates -- none should be selected | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,6 +79,8 @@ struct AncientSlotInfos { | |
total_alive_bytes_shrink: Saturating<u64>, | ||
/// total alive bytes across all slots | ||
total_alive_bytes: Saturating<u64>, | ||
/// best_slots_to_shrink | ||
best_slots_to_shrink: Vec<(Slot, u64)>, | ||
} | ||
|
||
impl AncientSlotInfos { | ||
|
@@ -177,8 +179,10 @@ impl AncientSlotInfos { | |
* tuning.percent_of_alive_shrunk_data | ||
/ 100, | ||
); | ||
self.best_slots_to_shrink = Vec::with_capacity(self.shrink_indexes.len()); | ||
for info_index in &self.shrink_indexes { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dmakarov sorry to go in circles... reverse is probably right and simplest. If you look at There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's sorted on capacity not on the amount of dead bytes, though. Isn't it? |
||
let info = &mut self.all_infos[*info_index]; | ||
self.best_slots_to_shrink.push((info.slot, info.capacity)); | ||
HaoranYi marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if bytes_to_shrink_due_to_ratio.0 >= threshold_bytes { | ||
// we exceeded the amount to shrink due to alive ratio, so don't shrink this one just due to 'should_shrink' | ||
// It MAY be shrunk based on total capacity still. | ||
|
@@ -396,7 +400,16 @@ impl AccountsDb { | |
self.shrink_ancient_stats | ||
.slots_considered | ||
.fetch_add(sorted_slots.len() as u64, Ordering::Relaxed); | ||
let ancient_slot_infos = self.collect_sort_filter_ancient_slots(sorted_slots, &tuning); | ||
let mut ancient_slot_infos = self.collect_sort_filter_ancient_slots(sorted_slots, &tuning); | ||
|
||
std::mem::swap( | ||
&mut *self.best_ancient_slots_to_shrink.write().unwrap(), | ||
&mut ancient_slot_infos.best_slots_to_shrink, | ||
); | ||
brooksprumo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Reverse the vector so that the elements with the largest | ||
// dead bytes are popped first when used to extend the | ||
// shrinking candidates. | ||
self.best_ancient_slots_to_shrink.write().unwrap().reverse(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. probably reverse them while it is still local, before swapping |
||
|
||
if ancient_slot_infos.all_infos.is_empty() { | ||
return; // nothing to do | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i think they are sorted now smallest dead bytes to largest? I don't see where we are getting that sort order just from the diffs here and I don't quite remember.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I just added sorting in another commit. I had to add a field to the tuple, so that we actually sort the elements by the amount of dead bytes.