diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 169e3d498f3fa3..18bff15be909a2 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -97,7 +97,7 @@ use { std::{ borrow::Cow, boxed::Box, - collections::{BTreeSet, HashMap, HashSet}, + collections::{hash_map::Entry, BTreeSet, HashMap, HashSet}, fs, hash::{Hash as StdHash, Hasher as StdHasher}, io::Result as IoResult, @@ -1352,6 +1352,8 @@ impl StoreAccountsTiming { struct CleaningInfo { slot_list: SlotList, ref_count: u64, + /// True for pubkeys which contains zero accounts + contains_zero: bool, } /// This is the return type of AccountsDb::construct_candidate_clean_keys. @@ -2849,6 +2851,7 @@ impl AccountsDb { CleaningInfo { slot_list, ref_count, + .. }, ) in bin.iter() { @@ -3139,11 +3142,25 @@ impl AccountsDb { .take(num_bins) .collect(); - let insert_pubkey = |pubkey: &Pubkey| { - let index = self.accounts_index.bin_calculator.bin_from_pubkey(pubkey); + let insert_candidate = |pubkey, is_zero| { + let index = self.accounts_index.bin_calculator.bin_from_pubkey(&pubkey); let mut candidates_bin = candidates[index].write().unwrap(); - candidates_bin.insert(*pubkey, CleaningInfo::default()); + + match candidates_bin.entry(pubkey) { + Entry::Occupied(occupied) => { + if is_zero { + occupied.into_mut().contains_zero = true; + } + } + Entry::Vacant(vacant) => { + vacant.insert(CleaningInfo { + contains_zero: is_zero, + ..Default::default() + }); + } + } }; + let dirty_ancient_stores = AtomicUsize::default(); let mut dirty_store_routine = || { let chunk_size = 1.max(dirty_stores_len.saturating_div(rayon::current_num_threads())); @@ -3156,7 +3173,12 @@ impl AccountsDb { dirty_ancient_stores.fetch_add(1, Ordering::Relaxed); } oldest_dirty_slot = oldest_dirty_slot.min(*slot); - store.accounts.scan_pubkeys(insert_pubkey); + + store.accounts.scan_accounts(|account| { + let pubkey = account.pubkey(); + let is_zero = account.is_zero_lamport(); + insert_candidate(*pubkey, is_zero); + }); }); oldest_dirty_slot }) @@ -3195,7 +3217,12 @@ impl AccountsDb { self.thread_pool_clean.install(|| { delta_keys.par_iter().for_each(|keys| { for key in keys { - insert_pubkey(key); + // Conservatively mark the candidate to be zero for + // correctness so that scan WILL try to look in disk if it is + // not in-mem. These keys are from 1) recently processed + // slots, 2) zeros found in shrink. Therefore, seldomly do + // we need to look up in disk. + insert_candidate(*key, true); } }); }); @@ -3217,7 +3244,7 @@ impl AccountsDb { let is_candidate_for_clean = max_slot_inclusive >= *slot && latest_full_snapshot_slot >= *slot; if is_candidate_for_clean { - insert_pubkey(pubkey); + insert_candidate(*pubkey, true); } !is_candidate_for_clean }); @@ -3484,6 +3511,7 @@ impl AccountsDb { CleaningInfo { slot_list, ref_count, + .. }, ) in candidates_bin.write().unwrap().iter_mut() { @@ -3563,6 +3591,7 @@ impl AccountsDb { let CleaningInfo { slot_list, ref_count: _, + .. } = cleaning_info; (!slot_list.is_empty()).then_some(( *pubkey, @@ -3866,6 +3895,7 @@ impl AccountsDb { let CleaningInfo { slot_list, ref_count: _, + .. } = cleaning_info; debug_assert!(!slot_list.is_empty(), "candidate slot_list can't be empty"); // Only keep candidates where the entire history of the account in the root set @@ -12956,6 +12986,7 @@ pub mod tests { CleaningInfo { slot_list: rooted_entries, ref_count, + ..Default::default() }, ); } @@ -12966,6 +12997,7 @@ pub mod tests { CleaningInfo { slot_list: list, ref_count, + .. }, ) in candidates_bin.iter() { @@ -15270,6 +15302,7 @@ pub mod tests { CleaningInfo { slot_list: vec![(slot, account_info)], ref_count: 1, + ..Default::default() }, ); let accounts_db = AccountsDb::new_single_for_tests(); diff --git a/accounts-db/src/accounts_index.rs b/accounts-db/src/accounts_index.rs index a849d08af385f6..f3f58979d6e242 100644 --- a/accounts-db/src/accounts_index.rs +++ b/accounts-db/src/accounts_index.rs @@ -92,6 +92,10 @@ pub enum ScanFilter { /// Similar to `OnlyAbnormal but also check on-disk index to verify the /// entry on-disk is indeed normal. OnlyAbnormalWithVerify, + + // Scan in-memory first, then, depending on the condition, to decide whether + // to check on-disk index if it is not found in memory. + ConditionalOnDisk(bool), } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -1524,6 +1528,20 @@ impl + Into> AccountsIndex { }); } } + ScanFilter::ConditionalOnDisk(check_disk) => { + let found = lock + .as_ref() + .unwrap() + .get_only_in_mem(pubkey, false, |entry| { + internal_callback(entry); + entry.is_some() + }); + if !found && check_disk { + lock.as_ref() + .unwrap() + .get_internal(pubkey, internal_callback); + } + } } }); }