diff --git a/accounts-db/Cargo.toml b/accounts-db/Cargo.toml index 82a983ede37a20..fafbeff18d0bce 100644 --- a/accounts-db/Cargo.toml +++ b/accounts-db/Cargo.toml @@ -103,6 +103,10 @@ harness = false name = "bench_hashing" harness = false +[[bench]] +name = "read_only_accounts_cache" +harness = false + [[bench]] name = "bench_serde" harness = false diff --git a/accounts-db/benches/bench_accounts_file.rs b/accounts-db/benches/bench_accounts_file.rs index 6fe87523cf18f1..ad077f4f2a82cc 100644 --- a/accounts-db/benches/bench_accounts_file.rs +++ b/accounts-db/benches/bench_accounts_file.rs @@ -1,8 +1,6 @@ #![allow(clippy::arithmetic_side_effects)] use { criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion, Throughput}, - rand::{distributions::WeightedIndex, prelude::*}, - rand_chacha::ChaChaRng, solana_accounts_db::{ accounts_file::StorageAccess, append_vec::{self, AppendVec, SCAN_BUFFER_SIZE_WITHOUT_DATA}, @@ -15,13 +13,14 @@ use { account::{AccountSharedData, ReadableAccount}, clock::Slot, pubkey::Pubkey, - rent::Rent, rent_collector::RENT_EXEMPT_RENT_EPOCH, system_instruction::MAX_PERMITTED_DATA_LENGTH, }, - std::{iter, mem::ManuallyDrop}, + std::mem::ManuallyDrop, }; +mod utils; + const ACCOUNTS_COUNTS: [usize; 4] = [ 1, // the smallest count; will bench overhead 100, // number of accounts written per slot on mnb (with *no* rent rewrites) @@ -116,40 +115,20 @@ fn bench_scan_pubkeys(c: &mut Criterion) { MAX_PERMITTED_DATA_LENGTH as usize, ]; let weights = [3, 75, 20, 1, 1]; - let distribution = WeightedIndex::new(weights).unwrap(); - - let rent = Rent::default(); - let rent_minimum_balances: Vec<_> = data_sizes - .iter() - .map(|data_size| rent.minimum_balance(*data_size)) - .collect(); for accounts_count in ACCOUNTS_COUNTS { group.throughput(Throughput::Elements(accounts_count as u64)); - let mut rng = ChaChaRng::seed_from_u64(accounts_count as u64); - let pubkeys: Vec<_> = iter::repeat_with(Pubkey::new_unique) + let storable_accounts: Vec<_> = utils::accounts(255, &data_sizes, &weights) .take(accounts_count) .collect(); - let accounts: Vec<_> = iter::repeat_with(|| { - let index = distribution.sample(&mut rng); - AccountSharedData::new_rent_epoch( - rent_minimum_balances[index], - data_sizes[index], - &Pubkey::default(), - RENT_EXEMPT_RENT_EPOCH, - ) - }) - .take(pubkeys.len()) - .collect(); - let storable_accounts: Vec<_> = iter::zip(&pubkeys, &accounts).collect(); // create an append vec file let append_vec_path = temp_dir.path().join(format!("append_vec_{accounts_count}")); _ = std::fs::remove_file(&append_vec_path); - let file_size = accounts + let file_size = storable_accounts .iter() - .map(|account| append_vec::aligned_stored_size(account.data().len())) + .map(|(_, account)| append_vec::aligned_stored_size(account.data().len())) .sum(); let append_vec = AppendVec::new(append_vec_path, true, file_size); let stored_accounts_info = append_vec diff --git a/accounts-db/benches/read_only_accounts_cache.rs b/accounts-db/benches/read_only_accounts_cache.rs new file mode 100644 index 00000000000000..4f1fa4febd3820 --- /dev/null +++ b/accounts-db/benches/read_only_accounts_cache.rs @@ -0,0 +1,327 @@ +use { + criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}, + rand::{rngs::SmallRng, seq::SliceRandom, SeedableRng}, + solana_accounts_db::{ + accounts_db::AccountsDb, read_only_accounts_cache::ReadOnlyAccountsCache, + }, + solana_sdk::system_instruction::MAX_PERMITTED_DATA_LENGTH, + std::{ + hint::black_box, + sync::{ + atomic::{AtomicBool, Ordering}, + Arc, + }, + thread::Builder, + time::{Duration, Instant}, + }, +}; +mod utils; + +/// Sizes of accounts. +/// +/// - No data. +/// - 165 bytes (a token account). +/// - 200 bytes (a stake account). +/// - 10 mebibytes (the max size for an account). +const DATA_SIZES: &[usize] = &[0, 165, 200, MAX_PERMITTED_DATA_LENGTH as usize]; +/// Distribution of the account sizes: +/// +/// - 3% of accounts have no data. +/// - 75% of accounts are 165 bytes (a token account). +/// - 20% of accounts are 200 bytes (a stake account). +/// - 2% of accounts are 10 mebibytes (the max size for an account). +const WEIGHTS: &[usize] = &[3, 75, 20, 2]; +/// Numbers of reader and writer threads to bench. +const NUM_READERS_WRITERS: &[usize] = &[ + 8, 16, + // These parameters are likely to freeze your computer, if it has less than + // 32 cores. + 32, 64, +]; + +/// Benchmarks read-only cache loads and stores without causing eviction. +fn bench_read_only_accounts_cache(c: &mut Criterion) { + let mut group = c.benchmark_group("read_only_accounts_cache"); + let slot = 0; + + // Prepare initial accounts, but make sure to not fill up the cache. + let accounts: Vec<_> = utils::accounts_with_size_limit( + 255, + DATA_SIZES, + WEIGHTS, + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_LO / 2, + ) + .collect(); + let pubkeys: Vec<_> = accounts + .iter() + .map(|(pubkey, _)| pubkey.to_owned()) + .collect(); + + for num_readers_writers in NUM_READERS_WRITERS { + let cache = Arc::new(ReadOnlyAccountsCache::new( + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_LO, + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_HI, + AccountsDb::READ_ONLY_CACHE_MS_TO_SKIP_LRU_UPDATE, + )); + + for (pubkey, account) in accounts.iter() { + cache.store(*pubkey, slot, account.clone()); + } + + // Spawn the reader threads in the background. They are reading the + // reading the initially inserted accounts. + let stop_threads = Arc::new(AtomicBool::new(false)); + let reader_handles = (0..*num_readers_writers) + .map(|i| { + let stop_threads = Arc::clone(&stop_threads); + let cache = Arc::clone(&cache); + let pubkeys = pubkeys.clone(); + + Builder::new() + .name(format!("reader{i:02}")) + .spawn({ + move || { + // Continuously read random accounts. + let mut rng = SmallRng::seed_from_u64(i as u64); + while !stop_threads.load(Ordering::Relaxed) { + let pubkey = pubkeys.choose(&mut rng).unwrap(); + black_box(cache.load(*pubkey, slot)); + } + } + }) + .unwrap() + }) + .collect::>(); + + // Spawn the writer threads in the background. + let slot = 1; + let writer_handles = (0..*num_readers_writers) + .map(|i| { + let stop_threads = Arc::clone(&stop_threads); + let cache = Arc::clone(&cache); + let accounts = accounts.clone(); + + Builder::new() + .name(format!("writer{i:02}")) + .spawn({ + move || { + // Continuously write to already existing pubkeys. + let mut rng = SmallRng::seed_from_u64(100_u64.saturating_add(i as u64)); + while !stop_threads.load(Ordering::Relaxed) { + let (pubkey, account) = accounts.choose(&mut rng).unwrap(); + cache.store(*pubkey, slot, account.clone()); + } + } + }) + .unwrap() + }) + .collect::>(); + + group.bench_function(BenchmarkId::new("store", num_readers_writers), |b| { + b.iter_custom(|iters| { + let mut total_time = Duration::new(0, 0); + + for (pubkey, account) in accounts.iter().cycle().take(iters as usize) { + // Measure only stores. + let start = Instant::now(); + cache.store(*pubkey, slot, account.clone()); + total_time = total_time.saturating_add(start.elapsed()); + } + total_time + }) + }); + group.bench_function(BenchmarkId::new("load", num_readers_writers), |b| { + b.iter_custom(|iters| { + let start = Instant::now(); + for (pubkey, _) in accounts.iter().cycle().take(iters as usize) { + black_box(cache.load(*pubkey, slot)); + } + + start.elapsed() + }) + }); + + stop_threads.store(true, Ordering::Relaxed); + for reader_handle in reader_handles { + reader_handle.join().unwrap(); + } + for writer_handle in writer_handles { + writer_handle.join().unwrap(); + } + } +} + +/// Benchmarks the read-only cache eviction mechanism. It does so by performing +/// multithreaded reads and writes on a full cache. Each write triggers +/// eviction. Background reads add more contention. +fn bench_read_only_accounts_cache_eviction( + c: &mut Criterion, + group_name: &str, + max_data_size_lo: usize, + max_data_size_hi: usize, +) { + // Prepare initial accounts, two times the high limit of the cache, to make + // sure that the backgroud threads sometimes try to store something which + // is not in the cache. + let accounts: Vec<_> = utils::accounts_with_size_limit( + 255, + DATA_SIZES, + WEIGHTS, + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_HI * 2, + ) + .collect(); + let pubkeys: Vec<_> = accounts + .iter() + .map(|(pubkey, _)| pubkey.to_owned()) + .collect(); + + let mut group = c.benchmark_group(group_name); + + for num_readers_writers in NUM_READERS_WRITERS { + let cache = Arc::new(ReadOnlyAccountsCache::new( + max_data_size_lo, + max_data_size_hi, + AccountsDb::READ_ONLY_CACHE_MS_TO_SKIP_LRU_UPDATE, + )); + + // Fill up the cache. + let slot = 0; + for (pubkey, account) in accounts.iter() { + cache.store(*pubkey, slot, account.clone()); + } + + // Spawn the reader threads in the background. They are reading the + // reading the initially inserted accounts. + let stop_threads = Arc::new(AtomicBool::new(false)); + let reader_handles = (0..*num_readers_writers) + .map(|i| { + let stop_threads = Arc::clone(&stop_threads); + let cache = Arc::clone(&cache); + let pubkeys = pubkeys.clone(); + + Builder::new() + .name(format!("reader{i:02}")) + .spawn({ + move || { + // Continuously read random accounts. + let mut rng = SmallRng::seed_from_u64(i as u64); + while !stop_threads.load(Ordering::Relaxed) { + let pubkey = pubkeys.choose(&mut rng).unwrap(); + black_box(cache.load(*pubkey, slot)); + } + } + }) + .unwrap() + }) + .collect::>(); + + // Spawn the writer threads in the background. Prepare the accounts + // with the same public keys and sizes as the initial ones. The + // intention is a constant overwrite in background for additional + // contention. + let slot = 1; + let writer_handles = (0..*num_readers_writers) + .map(|i| { + let stop_threads = Arc::clone(&stop_threads); + let cache = Arc::clone(&cache); + let accounts = accounts.clone(); + + Builder::new() + .name(format!("writer{i:02}")) + .spawn({ + move || { + // Continuously write to already existing pubkeys. + let mut rng = SmallRng::seed_from_u64(100_u64.saturating_add(i as u64)); + while !stop_threads.load(Ordering::Relaxed) { + let (pubkey, account) = accounts.choose(&mut rng).unwrap(); + cache.store(*pubkey, slot, account.clone()); + } + } + }) + .unwrap() + }) + .collect::>(); + + // Benchmark the performance of loading and storing accounts in a + // cache that is fully populated. This triggers eviction for each + // write operation. Background threads introduce contention. + group.bench_function(BenchmarkId::new("load", num_readers_writers), |b| { + b.iter_custom(|iters| { + let mut rng = SmallRng::seed_from_u64(1); + let mut total_time = Duration::new(0, 0); + + for _ in 0..iters { + let pubkey = pubkeys.choose(&mut rng).unwrap().to_owned(); + + let start = Instant::now(); + black_box(cache.load(pubkey, slot)); + total_time = total_time.saturating_add(start.elapsed()); + } + + total_time + }) + }); + group.bench_function(BenchmarkId::new("store", num_readers_writers), |b| { + b.iter_custom(|iters| { + let accounts = utils::accounts(0, DATA_SIZES, WEIGHTS).take(iters as usize); + + let start = Instant::now(); + for (pubkey, account) in accounts { + cache.store(pubkey, slot, account); + } + + start.elapsed() + }) + }); + + stop_threads.store(true, Ordering::Relaxed); + for reader_handle in reader_handles { + reader_handle.join().unwrap(); + } + for writer_handle in writer_handles { + writer_handle.join().unwrap(); + } + } +} + +/// Benchmarks read-only cache eviction with low and high thresholds. After +/// each eviction, enough stores need to be made to reach the difference +/// between the low and high threshold, triggering another eviction. +/// +/// Even though eviction is not made on each store, the number of iterations +/// are high enough to trigger eviction often. Contention which comes from +/// locking the cache is still visible both in the benchmark's time and +/// profiles gathered from the benchmark run. +/// +/// This benchmark aims to simulate contention in a manner close to what occurs +/// on validators. +fn bench_read_only_accounts_cache_eviction_lo_hi(c: &mut Criterion) { + bench_read_only_accounts_cache_eviction( + c, + "read_only_accounts_cache_eviction_lo_hi", + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_LO, + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_HI, + ) +} + +/// Benchmarks read-only cache eviction without differentiating between low and +/// high thresholds. Each store triggers another eviction immediately. +/// +/// This benchmark measures the absolutely worst-case scenario, which may not +/// reflect actual conditions in validators. +fn bench_read_only_accounts_cache_eviction_hi(c: &mut Criterion) { + bench_read_only_accounts_cache_eviction( + c, + "read_only_accounts_cache_eviction_hi", + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_HI, + AccountsDb::DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_HI, + ) +} + +criterion_group!( + benches, + bench_read_only_accounts_cache, + bench_read_only_accounts_cache_eviction_lo_hi, + bench_read_only_accounts_cache_eviction_hi +); +criterion_main!(benches); diff --git a/accounts-db/benches/utils.rs b/accounts-db/benches/utils.rs new file mode 100644 index 00000000000000..1b2c57a145b1b7 --- /dev/null +++ b/accounts-db/benches/utils.rs @@ -0,0 +1,84 @@ +// This file is included as a module separately in each bench, which causes +// a `dead_code` warning if the given bench doesn't `use` all functions. +#![allow(dead_code)] + +use { + rand::{ + distributions::{Distribution, WeightedIndex}, + Rng, SeedableRng, + }, + rand_chacha::ChaChaRng, + solana_sdk::{ + account::AccountSharedData, pubkey::Pubkey, rent::Rent, + rent_collector::RENT_EXEMPT_RENT_EPOCH, + }, + std::iter, +}; + +/// Returns an iterator with storable accounts. +pub fn accounts<'a>( + seed: u64, + data_sizes: &'a [usize], + weights: &'a [usize], +) -> impl Iterator + 'a { + let distribution = WeightedIndex::new(weights).unwrap(); + let mut rng = ChaChaRng::seed_from_u64(seed); + let rent = Rent::default(); + + iter::repeat_with(move || { + let index = distribution.sample(&mut rng); + let data_size = data_sizes[index]; + let owner: [u8; 32] = rng.gen(); + let owner = Pubkey::new_from_array(owner); + ( + owner, + AccountSharedData::new_rent_epoch( + rent.minimum_balance(data_size), + data_size, + &owner, + RENT_EXEMPT_RENT_EPOCH, + ), + ) + }) +} + +/// Returns an iterator over storable accounts such that the cumulative size of +/// all accounts does not exceed the given `size_limit`. +pub fn accounts_with_size_limit<'a>( + seed: u64, + data_sizes: &'a [usize], + weights: &'a [usize], + size_limit: usize, +) -> impl Iterator + 'a { + let distribution = WeightedIndex::new(weights).unwrap(); + let mut rng = ChaChaRng::seed_from_u64(seed); + let rent = Rent::default(); + let mut sum = 0_usize; + let mut stop_iter = false; + + iter::from_fn(move || { + let index = distribution.sample(&mut rng); + let data_size = data_sizes[index]; + sum = sum.saturating_add(data_size); + if stop_iter { + None + } else { + // If the limit is reached, include the current account as the last + // one, then stop iterating. + if sum >= size_limit { + stop_iter = true; + } + let owner = Pubkey::new_unique(); + + Some(( + owner, + AccountSharedData::new_rent_epoch( + rent.minimum_balance(data_size), + data_size, + &owner, + RENT_EXEMPT_RENT_EPOCH, + ), + )) + } + }) +} diff --git a/accounts-db/src/accounts_db.rs b/accounts-db/src/accounts_db.rs index 6ded12606d2f63..878642ed396eb8 100644 --- a/accounts-db/src/accounts_db.rs +++ b/accounts-db/src/accounts_db.rs @@ -1881,11 +1881,14 @@ impl AccountsDb { pub const DEFAULT_ACCOUNTS_HASH_CACHE_DIR: &'static str = "accounts_hash_cache"; // read only cache does not update lru on read of an entry unless it has been at least this many ms since the last lru update + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] const READ_ONLY_CACHE_MS_TO_SKIP_LRU_UPDATE: u32 = 100; // The default high and low watermark sizes for the accounts read cache. // If the cache size exceeds MAX_SIZE_HI, it'll evict entries until the size is <= MAX_SIZE_LO. + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] const DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_LO: usize = 400 * 1024 * 1024; + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] const DEFAULT_MAX_READ_ONLY_CACHE_DATA_SIZE_HI: usize = 410 * 1024 * 1024; pub fn default_for_tests() -> Self { diff --git a/accounts-db/src/lib.rs b/accounts-db/src/lib.rs index 8e7b4faf926b75..27c41ccf27dcce 100644 --- a/accounts-db/src/lib.rs +++ b/accounts-db/src/lib.rs @@ -32,6 +32,9 @@ mod file_io; pub mod hardened_unpack; pub mod partitioned_rewards; pub mod pubkey_bins; +#[cfg(feature = "dev-context-only-utils")] +pub mod read_only_accounts_cache; +#[cfg(not(feature = "dev-context-only-utils"))] mod read_only_accounts_cache; mod rolling_bit_field; pub mod secondary_index; diff --git a/accounts-db/src/read_only_accounts_cache.rs b/accounts-db/src/read_only_accounts_cache.rs index 2431761bc5f535..a616a863535073 100644 --- a/accounts-db/src/read_only_accounts_cache.rs +++ b/accounts-db/src/read_only_accounts_cache.rs @@ -1,5 +1,7 @@ //! ReadOnlyAccountsCache used to store accounts, such as executable accounts, //! which can be large, loaded many times, and rarely change. +#[cfg(feature = "dev-context-only-utils")] +use qualifier_attr::qualifiers; use { dashmap::{mapref::entry::Entry, DashMap}, index_list::{Index, IndexList}, @@ -22,6 +24,7 @@ use { }, }; +#[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] const CACHE_ENTRY_SIZE: usize = std::mem::size_of::() + 2 * std::mem::size_of::(); @@ -65,6 +68,7 @@ struct AtomicReadOnlyCacheStats { evictor_wakeup_count_productive: AtomicU64, } +#[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] #[derive(Debug)] pub(crate) struct ReadOnlyAccountsCache { cache: Arc>, @@ -93,6 +97,7 @@ pub(crate) struct ReadOnlyAccountsCache { } impl ReadOnlyAccountsCache { + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] pub(crate) fn new( max_data_size_lo: usize, max_data_size_hi: usize, @@ -137,6 +142,7 @@ impl ReadOnlyAccountsCache { } } + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] pub(crate) fn load(&self, pubkey: Pubkey, slot: Slot) -> Option { let (account, load_us) = measure_us!({ let mut found = None; @@ -175,6 +181,7 @@ impl ReadOnlyAccountsCache { CACHE_ENTRY_SIZE + account.data().len() } + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] pub(crate) fn store(&self, pubkey: Pubkey, slot: Slot, account: AccountSharedData) { let measure_store = Measure::start(""); self.highest_slot_stored.fetch_max(slot, Ordering::Release); @@ -218,6 +225,7 @@ impl ReadOnlyAccountsCache { self.remove(pubkey) } + #[cfg_attr(feature = "dev-context-only-utils", qualifiers(pub))] pub(crate) fn remove(&self, pubkey: Pubkey) -> Option { Self::do_remove(&pubkey, &self.cache, &self.queue, &self.data_size) } diff --git a/accounts-db/src/storable_accounts.rs b/accounts-db/src/storable_accounts.rs index 6304daf6002ba6..1843c53833d2c7 100644 --- a/accounts-db/src/storable_accounts.rs +++ b/accounts-db/src/storable_accounts.rs @@ -100,7 +100,8 @@ pub struct StorableAccountsCacher { /// abstract access to pubkey, account, slot, target_slot of either: /// a. (slot, &[&Pubkey, &ReadableAccount]) -/// b. (slot, &[&Pubkey, &ReadableAccount, Slot]) (we will use this later) +/// b. (slot, &[Pubkey, ReadableAccount]) +/// c. (slot, &[&Pubkey, &ReadableAccount, Slot]) (we will use this later) /// This trait avoids having to allocate redundant data when there is a duplicated slot parameter. /// All legacy callers do not have a unique slot per account to store. pub trait StorableAccounts<'a>: Sync { @@ -165,6 +166,26 @@ impl<'a: 'b, 'b> StorableAccounts<'a> for (Slot, &'b [(&'a Pubkey, &'a AccountSh } } +impl<'a: 'b, 'b> StorableAccounts<'a> for (Slot, &'b [(Pubkey, AccountSharedData)]) { + fn account( + &self, + index: usize, + mut callback: impl for<'local> FnMut(AccountForStorage<'local>) -> Ret, + ) -> Ret { + callback((&self.1[index].0, &self.1[index].1).into()) + } + fn slot(&self, _index: usize) -> Slot { + // per-index slot is not unique per slot when per-account slot is not included in the source data + self.target_slot() + } + fn target_slot(&self) -> Slot { + self.0 + } + fn len(&self) -> usize { + self.1.len() + } +} + /// holds slices of accounts being moved FROM a common source slot to 'target_slot' pub struct StorableAccountsBySlot<'a> { target_slot: Slot,