From 5cda86801c13c1779a79735638ba95dcf98bd963 Mon Sep 17 00:00:00 2001 From: HaoranYi Date: Mon, 9 Sep 2024 19:31:44 +0000 Subject: [PATCH 1/8] clean scan optimization --- accounts-db/src/accounts_db.rs | 75 +++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 11 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 162ce25cede85e..da24902c8051d3 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -97,7 +97,7 @@ use { std::{ borrow::Cow, boxed::Box, - collections::{BTreeSet, HashMap, HashSet}, + collections::{hash_map::Entry, BTreeSet, HashMap, HashSet}, fs, hash::{Hash as StdHash, Hasher as StdHasher}, io::Result as IoResult, @@ -1352,6 +1352,8 @@ impl StoreAccountsTiming { struct CleaningInfo { slot_list: SlotList, ref_count: u64, + /// True for pubkeys which can contain zero accounts + can_contain_zero: bool, } /// This is the return type of AccountsDb::construct_candidate_clean_keys. @@ -2849,6 +2851,7 @@ impl AccountsDb { CleaningInfo { slot_list, ref_count, + .. }, ) in bin.iter() { @@ -3119,11 +3122,11 @@ impl AccountsDb { .sum::() as u64 } - fn insert_pubkey(&self, candidates: &[RwLock>], pubkey: Pubkey) { - let index = self.accounts_index.bin_calculator.bin_from_pubkey(&pubkey); - let mut candidates_bin = candidates[index].write().unwrap(); - candidates_bin.insert(pubkey, CleaningInfo::default()); - } + // fn insert_pubkey(&self, candidates: &[RwLock>], pubkey: Pubkey) { + // let index = self.accounts_index.bin_calculator.bin_from_pubkey(&pubkey); + // let mut candidates_bin = candidates[index].write().unwrap(); + // candidates_bin.insert(pubkey, CleaningInfo::default()); + // } /// Construct a list of candidates for cleaning from: /// - dirty_stores -- set of stores which had accounts @@ -3162,6 +3165,26 @@ impl AccountsDb { std::iter::repeat_with(|| RwLock::new(HashMap::::new())) .take(num_bins) .collect(); + + let insert_candidate = |pubkey, is_zero| { + let index = self.accounts_index.bin_calculator.bin_from_pubkey(&pubkey); + let mut candidates_bin = candidates[index].write().unwrap(); + + match candidates_bin.entry(pubkey) { + Entry::Occupied(occupied) => { + if is_zero { + occupied.into_mut().can_contain_zero = true; + } + } + Entry::Vacant(vacant) => { + vacant.insert(CleaningInfo { + can_contain_zero: is_zero, + ..Default::default() + }); + } + } + }; + let dirty_ancient_stores = AtomicUsize::default(); let mut dirty_store_routine = || { let chunk_size = 1.max(dirty_stores_len.saturating_div(rayon::current_num_threads())); @@ -3174,9 +3197,13 @@ impl AccountsDb { dirty_ancient_stores.fetch_add(1, Ordering::Relaxed); } oldest_dirty_slot = oldest_dirty_slot.min(*slot); - store - .accounts - .scan_pubkeys(|pubkey| self.insert_pubkey(&candidates, *pubkey)); + + store.accounts.scan_index(|index| { + let pubkey = index.index_info.pubkey; + let is_zero = index.index_info.lamports == 0; + + insert_candidate(pubkey, is_zero); + }); }); oldest_dirty_slot }) @@ -3210,6 +3237,22 @@ impl AccountsDb { collect_delta_keys.stop(); timings.collect_delta_keys_us += collect_delta_keys.as_us(); + // let mut delta_insert = Measure::start("delta_insert"); + // self.thread_pool_clean.install(|| { + // delta_keys.par_iter().for_each(|keys| { + // for key in keys { + // // Conservatively mark the candidate to be zero for + // // correctness so that scan WILL try to look in disk if it is + // // not in-mem. These keys are from 1) recently processed + // // slots, 2) zeros found in shrink. Therefore, seldomly do + // // we need to look up in disk. + // insert_candidate(*key, true); + // } + // }); + // }); + // delta_insert.stop(); + // timings.delta_insert_us += delta_insert.as_us(); + timings.delta_key_count = Self::count_pubkeys(&candidates); // Check if we should purge any of the @@ -3226,7 +3269,7 @@ impl AccountsDb { let is_candidate_for_clean = max_slot_inclusive >= *slot && latest_full_snapshot_slot >= *slot; if is_candidate_for_clean { - self.insert_pubkey(&candidates, *pubkey); + insert_candidate(*pubkey, true); } !is_candidate_for_clean }); @@ -3442,7 +3485,11 @@ impl AccountsDb { }, None, false, - ScanFilter::All, + if candidate_info.can_contain_zero { + ScanFilter::All + } else { + self.scan_filter_for_shrinking + }, ); if should_purge { let reclaims_new = self.collect_reclaims( @@ -3493,6 +3540,7 @@ impl AccountsDb { CleaningInfo { slot_list, ref_count, + .. }, ) in candidates_bin.write().unwrap().iter_mut() { @@ -3572,6 +3620,7 @@ impl AccountsDb { let CleaningInfo { slot_list, ref_count: _, + .. } = cleaning_info; (!slot_list.is_empty()).then_some(( *pubkey, @@ -3875,6 +3924,7 @@ impl AccountsDb { let CleaningInfo { slot_list, ref_count: _, + .. } = cleaning_info; debug_assert!(!slot_list.is_empty(), "candidate slot_list can't be empty"); // Only keep candidates where the entire history of the account in the root set @@ -12959,6 +13009,7 @@ pub mod tests { CleaningInfo { slot_list: rooted_entries, ref_count, + ..Default::default() }, ); } @@ -12969,6 +13020,7 @@ pub mod tests { CleaningInfo { slot_list: list, ref_count, + .. }, ) in candidates_bin.iter() { @@ -15221,6 +15273,7 @@ pub mod tests { CleaningInfo { slot_list: vec![(slot, account_info)], ref_count: 1, + ..Default::default() }, ); let accounts_db = AccountsDb::new_single_for_tests(); From 26f77fb2d08fa7f05cb04387e2552f350ccbeaca Mon Sep 17 00:00:00 2001 From: HaoranYi Date: Tue, 24 Sep 2024 21:00:57 +0000 Subject: [PATCH 2/8] fix rebase conflicts --- accounts-db/src/accounts_db.rs | 36 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index da24902c8051d3..e43b992a142e99 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -3108,7 +3108,18 @@ impl AccountsDb { candidates_bin = candidates[curr_bin].write().unwrap(); prev_bin = curr_bin; } - candidates_bin.insert(removed_pubkey, CleaningInfo::default()); + // Conservatively mark the candidate to be zero for + // correctness so that scan WILL try to look in disk if it is + // not in-mem. These keys are from 1) recently processed + // slots, 2) zeros found in shrink. Therefore, they are very likely + // to be in-memory, and seldomly do we need to look them up in disk. + candidates_bin.insert( + removed_pubkey, + CleaningInfo { + can_contain_zero: true, + ..Default::default() + }, + ); } } } @@ -3122,12 +3133,6 @@ impl AccountsDb { .sum::() as u64 } - // fn insert_pubkey(&self, candidates: &[RwLock>], pubkey: Pubkey) { - // let index = self.accounts_index.bin_calculator.bin_from_pubkey(&pubkey); - // let mut candidates_bin = candidates[index].write().unwrap(); - // candidates_bin.insert(pubkey, CleaningInfo::default()); - // } - /// Construct a list of candidates for cleaning from: /// - dirty_stores -- set of stores which had accounts /// removed or recently rooted; @@ -3201,7 +3206,6 @@ impl AccountsDb { store.accounts.scan_index(|index| { let pubkey = index.index_info.pubkey; let is_zero = index.index_info.lamports == 0; - insert_candidate(pubkey, is_zero); }); }); @@ -3237,22 +3241,6 @@ impl AccountsDb { collect_delta_keys.stop(); timings.collect_delta_keys_us += collect_delta_keys.as_us(); - // let mut delta_insert = Measure::start("delta_insert"); - // self.thread_pool_clean.install(|| { - // delta_keys.par_iter().for_each(|keys| { - // for key in keys { - // // Conservatively mark the candidate to be zero for - // // correctness so that scan WILL try to look in disk if it is - // // not in-mem. These keys are from 1) recently processed - // // slots, 2) zeros found in shrink. Therefore, seldomly do - // // we need to look up in disk. - // insert_candidate(*key, true); - // } - // }); - // }); - // delta_insert.stop(); - // timings.delta_insert_us += delta_insert.as_us(); - timings.delta_key_count = Self::count_pubkeys(&candidates); // Check if we should purge any of the From 84263bf712ec16ca1885f8c0707cab6ed09a2bc3 Mon Sep 17 00:00:00 2001 From: HaoranYi <219428+HaoranYi@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:04:19 -0500 Subject: [PATCH 3/8] Update accounts-db/src/accounts_db.rs Co-authored-by: Brooks --- accounts-db/src/accounts_db.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index e43b992a142e99..4269583bfbd0a3 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -1352,8 +1352,10 @@ impl StoreAccountsTiming { struct CleaningInfo { slot_list: SlotList, ref_count: u64, - /// True for pubkeys which can contain zero accounts - can_contain_zero: bool, + /// Indicates if this account might have a zero lamport index entry. + /// If false, the account *shall* not have zero lamport index entries. + /// If true, the account *might* have zero lamport index entries. + might_contain_zero_lamport_entry: bool, } /// This is the return type of AccountsDb::construct_candidate_clean_keys. From 07ae15d52509f4dbfc40f7a113a7409297c4b71f Mon Sep 17 00:00:00 2001 From: HaoranYi <219428+HaoranYi@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:04:39 -0500 Subject: [PATCH 4/8] Update accounts-db/src/accounts_db.rs Co-authored-by: Brooks --- accounts-db/src/accounts_db.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 4269583bfbd0a3..943cdde5883816 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -3110,10 +3110,10 @@ impl AccountsDb { candidates_bin = candidates[curr_bin].write().unwrap(); prev_bin = curr_bin; } - // Conservatively mark the candidate to be zero for + // Conservatively mark the candidate might have a zero lamport entry for // correctness so that scan WILL try to look in disk if it is // not in-mem. These keys are from 1) recently processed - // slots, 2) zeros found in shrink. Therefore, they are very likely + // slots, 2) zero lamports found in shrink. Therefore, they are very likely // to be in-memory, and seldomly do we need to look them up in disk. candidates_bin.insert( removed_pubkey, From d245937407311930086b3391946e43212c86e9b7 Mon Sep 17 00:00:00 2001 From: HaoranYi <219428+HaoranYi@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:04:48 -0500 Subject: [PATCH 5/8] Update accounts-db/src/accounts_db.rs Co-authored-by: Brooks --- accounts-db/src/accounts_db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 943cdde5883816..9572e4b266a9c6 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -3173,7 +3173,7 @@ impl AccountsDb { .take(num_bins) .collect(); - let insert_candidate = |pubkey, is_zero| { + let insert_candidate = |pubkey, is_zero_lamport| { let index = self.accounts_index.bin_calculator.bin_from_pubkey(&pubkey); let mut candidates_bin = candidates[index].write().unwrap(); From a3333abaafe94530b047bbe6236c61595ae37e4d Mon Sep 17 00:00:00 2001 From: HaoranYi <219428+HaoranYi@users.noreply.github.com> Date: Wed, 25 Sep 2024 09:04:56 -0500 Subject: [PATCH 6/8] Update accounts-db/src/accounts_db.rs Co-authored-by: Brooks --- accounts-db/src/accounts_db.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 9572e4b266a9c6..421c012facd5b4 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -3207,7 +3207,7 @@ impl AccountsDb { store.accounts.scan_index(|index| { let pubkey = index.index_info.pubkey; - let is_zero = index.index_info.lamports == 0; + let is_zero_lamport = index.index_info.is_zero_lamport(); insert_candidate(pubkey, is_zero); }); }); From e45cf240a292e94ee2e9adab9d6d2283a7c65fc8 Mon Sep 17 00:00:00 2001 From: HaoranYi Date: Wed, 25 Sep 2024 14:13:12 +0000 Subject: [PATCH 7/8] review update --- accounts-db/src/accounts_db.rs | 26 ++++++++------------------ accounts-db/src/append_vec.rs | 6 ++++++ 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 421c012facd5b4..6223a38db0f91b 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -97,7 +97,7 @@ use { std::{ borrow::Cow, boxed::Box, - collections::{hash_map::Entry, BTreeSet, HashMap, HashSet}, + collections::{BTreeSet, HashMap, HashSet}, fs, hash::{Hash as StdHash, Hasher as StdHasher}, io::Result as IoResult, @@ -3118,7 +3118,7 @@ impl AccountsDb { candidates_bin.insert( removed_pubkey, CleaningInfo { - can_contain_zero: true, + might_contain_zero_lamport_entry: true, ..Default::default() }, ); @@ -3176,20 +3176,10 @@ impl AccountsDb { let insert_candidate = |pubkey, is_zero_lamport| { let index = self.accounts_index.bin_calculator.bin_from_pubkey(&pubkey); let mut candidates_bin = candidates[index].write().unwrap(); - - match candidates_bin.entry(pubkey) { - Entry::Occupied(occupied) => { - if is_zero { - occupied.into_mut().can_contain_zero = true; - } - } - Entry::Vacant(vacant) => { - vacant.insert(CleaningInfo { - can_contain_zero: is_zero, - ..Default::default() - }); - } - } + candidates_bin + .entry(pubkey) + .or_default() + .might_contain_zero_lamport_entry |= is_zero_lamport; }; let dirty_ancient_stores = AtomicUsize::default(); @@ -3208,7 +3198,7 @@ impl AccountsDb { store.accounts.scan_index(|index| { let pubkey = index.index_info.pubkey; let is_zero_lamport = index.index_info.is_zero_lamport(); - insert_candidate(pubkey, is_zero); + insert_candidate(pubkey, is_zero_lamport); }); }); oldest_dirty_slot @@ -3475,7 +3465,7 @@ impl AccountsDb { }, None, false, - if candidate_info.can_contain_zero { + if candidate_info.might_contain_zero_lamport_entry { ScanFilter::All } else { self.scan_filter_for_shrinking diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 52f72477566459..91f522e83179a3 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -209,6 +209,12 @@ pub(crate) struct IndexInfoInner { pub data_len: u64, } +impl ZeroLamport for IndexInfoInner { + fn is_zero_lamport(&self) -> bool { + self.lamports == 0 + } +} + /// offsets to help navigate the persisted format of `AppendVec` #[derive(Debug)] struct AccountOffsets { From 9f676c4246092e4c0d93ad8c8664a331df736ecd Mon Sep 17 00:00:00 2001 From: HaoranYi Date: Wed, 25 Sep 2024 14:46:07 +0000 Subject: [PATCH 8/8] revert ZeroLamport trait for IndexInfoInner --- accounts-db/src/accounts_db.rs | 2 +- accounts-db/src/append_vec.rs | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 6223a38db0f91b..9a0533b5b7ff35 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -3197,7 +3197,7 @@ impl AccountsDb { store.accounts.scan_index(|index| { let pubkey = index.index_info.pubkey; - let is_zero_lamport = index.index_info.is_zero_lamport(); + let is_zero_lamport = index.index_info.lamports == 0; insert_candidate(pubkey, is_zero_lamport); }); }); diff --git a/accounts-db/src/append_vec.rs b/accounts-db/src/append_vec.rs index 91f522e83179a3..52f72477566459 100644 --- a/accounts-db/src/append_vec.rs +++ b/accounts-db/src/append_vec.rs @@ -209,12 +209,6 @@ pub(crate) struct IndexInfoInner { pub data_len: u64, } -impl ZeroLamport for IndexInfoInner { - fn is_zero_lamport(&self) -> bool { - self.lamports == 0 - } -} - /// offsets to help navigate the persisted format of `AppendVec` #[derive(Debug)] struct AccountOffsets {