From 182d27f718e1d6df347bdbe60edaba6026f1c6b5 Mon Sep 17 00:00:00 2001 From: Brooks Date: Thu, 28 Mar 2024 11:14:23 -0400 Subject: [PATCH] Checks if bank snapshot is loadable before fastbooting (#343) --- core/src/accounts_hash_verifier.rs | 35 ++++++- ledger/src/bank_forks_utils.rs | 126 ++++++++++++------------ local-cluster/tests/local_cluster.rs | 99 +++++++++++++++++++ runtime/src/snapshot_bank_utils.rs | 139 ++++++++++++++++++++++++++- runtime/src/snapshot_utils.rs | 103 ++++++++++++++++++++ 5 files changed, 430 insertions(+), 72 deletions(-) diff --git a/core/src/accounts_hash_verifier.rs b/core/src/accounts_hash_verifier.rs index 20adba99835eeb..29e592ff979355 100644 --- a/core/src/accounts_hash_verifier.rs +++ b/core/src/accounts_hash_verifier.rs @@ -24,6 +24,7 @@ use { hash::Hash, }, std::{ + io::{Error as IoError, Result as IoResult}, sync::{ atomic::{AtomicBool, Ordering}, Arc, @@ -71,12 +72,17 @@ impl AccountsHashVerifier { info!("handling accounts package: {accounts_package:?}"); let enqueued_time = accounts_package.enqueued.elapsed(); - let (_, handling_time_us) = measure_us!(Self::process_accounts_package( + let (result, handling_time_us) = measure_us!(Self::process_accounts_package( accounts_package, snapshot_package_sender.as_ref(), &snapshot_config, &exit, )); + if let Err(err) = result { + error!("Stopping AccountsHashVerifier! Fatal error while processing accounts package: {err}"); + exit.store(true, Ordering::Relaxed); + break; + } datapoint_info!( "accounts_hash_verifier", @@ -208,9 +214,9 @@ impl AccountsHashVerifier { snapshot_package_sender: Option<&Sender>, snapshot_config: &SnapshotConfig, exit: &AtomicBool, - ) { + ) -> IoResult<()> { let accounts_hash = - Self::calculate_and_verify_accounts_hash(&accounts_package, snapshot_config); + Self::calculate_and_verify_accounts_hash(&accounts_package, snapshot_config)?; Self::save_epoch_accounts_hash(&accounts_package, accounts_hash); @@ -221,13 +227,15 @@ impl AccountsHashVerifier { accounts_hash, exit, ); + + Ok(()) } /// returns calculated accounts hash fn calculate_and_verify_accounts_hash( accounts_package: &AccountsPackage, snapshot_config: &SnapshotConfig, - ) -> AccountsHashKind { + ) -> IoResult { let accounts_hash_calculation_kind = match accounts_package.package_kind { AccountsPackageKind::AccountsHashVerifier => CalcAccountsHashKind::Full, AccountsPackageKind::EpochAccountsHash => CalcAccountsHashKind::Full, @@ -303,6 +311,23 @@ impl AccountsHashVerifier { &accounts_hash_for_reserialize, bank_incremental_snapshot_persistence.as_ref(), ); + + // now write the full snapshot slot file after reserializing so this bank snapshot is loadable + let full_snapshot_archive_slot = match accounts_package.package_kind { + AccountsPackageKind::Snapshot(SnapshotKind::IncrementalSnapshot(base_slot)) => { + base_slot + } + _ => accounts_package.slot, + }; + snapshot_utils::write_full_snapshot_slot_file( + &snapshot_info.bank_snapshot_dir, + full_snapshot_archive_slot, + ) + .map_err(|err| { + IoError::other(format!( + "failed to calculate accounts hash for {accounts_package:?}: {err}" + )) + })?; } if accounts_package.package_kind @@ -340,7 +365,7 @@ impl AccountsHashVerifier { ); } - accounts_hash_kind + Ok(accounts_hash_kind) } fn _calculate_full_accounts_hash( diff --git a/ledger/src/bank_forks_utils.rs b/ledger/src/bank_forks_utils.rs index 17412c1801ac68..a64b29bdcf8670 100644 --- a/ledger/src/bank_forks_utils.rs +++ b/ledger/src/bank_forks_utils.rs @@ -244,20 +244,70 @@ fn bank_forks_from_snapshot( .map(SnapshotArchiveInfoGetter::slot) .unwrap_or(0), ); - let latest_bank_snapshot = - snapshot_utils::get_highest_bank_snapshot_post(&snapshot_config.bank_snapshots_dir); - let will_startup_from_snapshot_archives = match process_options.use_snapshot_archives_at_startup - { - UseSnapshotArchivesAtStartup::Always => true, - UseSnapshotArchivesAtStartup::Never => false, - UseSnapshotArchivesAtStartup::WhenNewest => latest_bank_snapshot - .as_ref() - .map(|bank_snapshot| latest_snapshot_archive_slot > bank_snapshot.slot) - .unwrap_or(true), + let fastboot_snapshot = match process_options.use_snapshot_archives_at_startup { + UseSnapshotArchivesAtStartup::Always => None, + UseSnapshotArchivesAtStartup::Never => { + let Some(bank_snapshot) = + snapshot_utils::get_highest_loadable_bank_snapshot(snapshot_config) + else { + return Err(BankForksUtilsError::NoBankSnapshotDirectory { + flag: use_snapshot_archives_at_startup::cli::LONG_ARG.to_string(), + value: UseSnapshotArchivesAtStartup::Never.to_string(), + }); + }; + // If a newer snapshot archive was downloaded, it is possible that its slot is + // higher than the local state we will load. Did the user intend for this? + if bank_snapshot.slot < latest_snapshot_archive_slot { + warn!( + "Starting up from local state at slot {}, which is *older* than \ + the latest snapshot archive at slot {}. If this is not desired, \ + change the --{} CLI option to *not* \"{}\" and restart.", + bank_snapshot.slot, + latest_snapshot_archive_slot, + use_snapshot_archives_at_startup::cli::LONG_ARG, + UseSnapshotArchivesAtStartup::Never.to_string(), + ); + } + Some(bank_snapshot) + } + UseSnapshotArchivesAtStartup::WhenNewest => { + snapshot_utils::get_highest_loadable_bank_snapshot(snapshot_config) + .filter(|bank_snapshot| bank_snapshot.slot >= latest_snapshot_archive_slot) + } }; - let bank = if will_startup_from_snapshot_archives { + let bank = if let Some(fastboot_snapshot) = fastboot_snapshot { + let (bank, _) = snapshot_bank_utils::bank_from_snapshot_dir( + &account_paths, + &fastboot_snapshot, + genesis_config, + &process_options.runtime_config, + process_options.debug_keys.clone(), + None, + process_options.account_indexes.clone(), + process_options.limit_load_slot_count_from_snapshot, + process_options.shrink_ratio, + process_options.verify_index, + process_options.accounts_db_config.clone(), + accounts_update_notifier, + exit, + ) + .map_err(|err| BankForksUtilsError::BankFromSnapshotsDirectory { + source: err, + path: fastboot_snapshot.snapshot_path(), + })?; + + // If the node crashes before taking the next bank snapshot, the next startup will attempt + // to load from the same bank snapshot again. And if `shrink` has run, the account storage + // files that are hard linked in bank snapshot will be *different* than what the bank + // snapshot expects. This would cause the node to crash again. To prevent that, purge all + // the bank snapshots here. In the above scenario, this will cause the node to load from a + // snapshot archive next time, which is safe. + snapshot_utils::purge_all_bank_snapshots(&snapshot_config.bank_snapshots_dir); + + bank + } else { // Given that we are going to boot from an archive, the append vecs held in the snapshot dirs for fast-boot should // be released. They will be released by the account_background_service anyway. But in the case of the account_paths // using memory-mounted file system, they are not released early enough to give space for the new append-vecs from @@ -292,60 +342,6 @@ fn bank_forks_from_snapshot( .map(|archive| archive.path().display().to_string()) .unwrap_or("none".to_string()), })?; - bank - } else { - let bank_snapshot = - latest_bank_snapshot.ok_or_else(|| BankForksUtilsError::NoBankSnapshotDirectory { - flag: use_snapshot_archives_at_startup::cli::LONG_ARG.to_string(), - value: UseSnapshotArchivesAtStartup::Never.to_string(), - })?; - - // If a newer snapshot archive was downloaded, it is possible that its slot is - // higher than the local bank we will load. Did the user intend for this? - if bank_snapshot.slot < latest_snapshot_archive_slot { - assert_eq!( - process_options.use_snapshot_archives_at_startup, - UseSnapshotArchivesAtStartup::Never, - ); - warn!( - "Starting up from local state at slot {}, which is *older* than \ - the latest snapshot archive at slot {}. If this is not desired, \ - change the --{} CLI option to *not* \"{}\" and restart.", - bank_snapshot.slot, - latest_snapshot_archive_slot, - use_snapshot_archives_at_startup::cli::LONG_ARG, - UseSnapshotArchivesAtStartup::Never.to_string(), - ); - } - - let (bank, _) = snapshot_bank_utils::bank_from_snapshot_dir( - &account_paths, - &bank_snapshot, - genesis_config, - &process_options.runtime_config, - process_options.debug_keys.clone(), - None, - process_options.account_indexes.clone(), - process_options.limit_load_slot_count_from_snapshot, - process_options.shrink_ratio, - process_options.verify_index, - process_options.accounts_db_config.clone(), - accounts_update_notifier, - exit, - ) - .map_err(|err| BankForksUtilsError::BankFromSnapshotsDirectory { - source: err, - path: bank_snapshot.snapshot_path(), - })?; - - // If the node crashes before taking the next bank snapshot, the next startup will attempt - // to load from the same bank snapshot again. And if `shrink` has run, the account storage - // files that are hard linked in bank snapshot will be *different* than what the bank - // snapshot expects. This would cause the node to crash again. To prevent that, purge all - // the bank snapshots here. In the above scenario, this will cause the node to load from a - // snapshot archive next time, which is safe. - snapshot_utils::purge_all_bank_snapshots(&snapshot_config.bank_snapshots_dir); - bank }; diff --git a/local-cluster/tests/local_cluster.rs b/local-cluster/tests/local_cluster.rs index f36e94df8a661d..44032aeeb4d38b 100644 --- a/local-cluster/tests/local_cluster.rs +++ b/local-cluster/tests/local_cluster.rs @@ -5067,6 +5067,105 @@ fn test_boot_from_local_state() { } } +/// Test fastboot to ensure a node can boot in case it crashed while archiving a full snapshot +/// +/// 1. Start a node and wait for it to take at least two full snapshots and one more +/// bank snapshot POST afterwards (for simplicity, wait for 2 full and 1 incremental). +/// 2. To simulate a node crashing while archiving a full snapshot, stop the node and +/// then delete the latest full snapshot archive. +/// 3. Restart the node. This should succeed, and boot from the older full snapshot archive, +/// *not* the latest bank snapshot POST. +/// 4. Take another incremental snapshot. This ensures the correct snapshot was loaded, +/// AND ensures the correct accounts hashes are present (which are needed when making +/// the bank snapshot POST for the new incremental snapshot). +#[test] +#[serial] +fn test_boot_from_local_state_missing_archive() { + solana_logger::setup_with_default(RUST_LOG_FILTER); + const FULL_SNAPSHOT_INTERVAL: Slot = 20; + const INCREMENTAL_SNAPSHOT_INTERVAL: Slot = 10; + + let validator_config = SnapshotValidatorConfig::new( + FULL_SNAPSHOT_INTERVAL, + INCREMENTAL_SNAPSHOT_INTERVAL, + INCREMENTAL_SNAPSHOT_INTERVAL, + 7, + ); + + let mut cluster_config = ClusterConfig { + node_stakes: vec![100 * DEFAULT_NODE_STAKE], + cluster_lamports: DEFAULT_CLUSTER_LAMPORTS, + validator_configs: make_identical_validator_configs(&validator_config.validator_config, 1), + ..ClusterConfig::default() + }; + let mut cluster = LocalCluster::new(&mut cluster_config, SocketAddrSpace::Unspecified); + + // we need two full snapshots and an incremental snapshot for this test + info!("Waiting for validator to create snapshots..."); + LocalCluster::wait_for_next_full_snapshot( + &cluster, + &validator_config.full_snapshot_archives_dir, + Some(Duration::from_secs(5 * 60)), + ); + LocalCluster::wait_for_next_full_snapshot( + &cluster, + &validator_config.full_snapshot_archives_dir, + Some(Duration::from_secs(5 * 60)), + ); + LocalCluster::wait_for_next_incremental_snapshot( + &cluster, + &validator_config.full_snapshot_archives_dir, + &validator_config.incremental_snapshot_archives_dir, + Some(Duration::from_secs(5 * 60)), + ); + debug!( + "snapshot archives:\n\tfull: {:?}\n\tincr: {:?}", + snapshot_utils::get_full_snapshot_archives( + validator_config.full_snapshot_archives_dir.path() + ), + snapshot_utils::get_incremental_snapshot_archives( + validator_config.incremental_snapshot_archives_dir.path() + ), + ); + info!("Waiting for validator to create snapshots... DONE"); + + // now delete the latest full snapshot archive and restart, to simulate a crash while archiving + // a full snapshot package + info!("Stopping validator..."); + let validator_pubkey = cluster.get_node_pubkeys()[0]; + let mut validator_info = cluster.exit_node(&validator_pubkey); + info!("Stopping validator... DONE"); + + info!("Deleting latest full snapshot archive..."); + let highest_full_snapshot = snapshot_utils::get_highest_full_snapshot_archive_info( + validator_config.full_snapshot_archives_dir.path(), + ) + .unwrap(); + fs::remove_file(highest_full_snapshot.path()).unwrap(); + info!("Deleting latest full snapshot archive... DONE"); + + info!("Restarting validator..."); + // if we set this to `Never`, the validator should not boot + validator_info.config.use_snapshot_archives_at_startup = + UseSnapshotArchivesAtStartup::WhenNewest; + cluster.restart_node( + &validator_pubkey, + validator_info, + SocketAddrSpace::Unspecified, + ); + info!("Restarting validator... DONE"); + + // ensure we can create new incremental snapshots, since that is what used to fail + info!("Waiting for validator to create snapshots..."); + LocalCluster::wait_for_next_incremental_snapshot( + &cluster, + &validator_config.full_snapshot_archives_dir, + &validator_config.incremental_snapshot_archives_dir, + Some(Duration::from_secs(5 * 60)), + ); + info!("Waiting for validator to create snapshots... DONE"); +} + // We want to simulate the following: // /--- 1 --- 3 (duplicate block) // 0 diff --git a/runtime/src/snapshot_bank_utils.rs b/runtime/src/snapshot_bank_utils.rs index 03a26d46986ddf..6db2747089d30c 100644 --- a/runtime/src/snapshot_bank_utils.rs +++ b/runtime/src/snapshot_bank_utils.rs @@ -1257,13 +1257,16 @@ mod tests { crate::{ bank_forks::BankForks, genesis_utils, + snapshot_config::SnapshotConfig, snapshot_utils::{ clean_orphaned_account_snapshot_dirs, create_tmp_accounts_dir_for_tests, get_bank_snapshots, get_bank_snapshots_post, get_bank_snapshots_pre, - get_highest_bank_snapshot, purge_all_bank_snapshots, purge_bank_snapshot, + get_highest_bank_snapshot, get_highest_bank_snapshot_pre, + get_highest_loadable_bank_snapshot, purge_all_bank_snapshots, purge_bank_snapshot, purge_bank_snapshots_older_than_slot, purge_incomplete_bank_snapshots, purge_old_bank_snapshots, purge_old_bank_snapshots_at_startup, - snapshot_storage_rebuilder::get_slot_and_append_vec_id, ArchiveFormat, + snapshot_storage_rebuilder::get_slot_and_append_vec_id, + write_full_snapshot_slot_file, ArchiveFormat, SNAPSHOT_FULL_SNAPSHOT_SLOT_FILENAME, }, status_cache::Status, }, @@ -2638,4 +2641,136 @@ mod tests { Err(VerifySlotDeltasError::SlotNotFoundInDeltas(333)), ); } + + #[test] + fn test_get_highest_loadable_bank_snapshot() { + let bank_snapshots_dir = TempDir::new().unwrap(); + let full_snapshot_archives_dir = TempDir::new().unwrap(); + let incremental_snapshot_archives_dir = TempDir::new().unwrap(); + + let snapshot_config = SnapshotConfig { + bank_snapshots_dir: bank_snapshots_dir.as_ref().to_path_buf(), + full_snapshot_archives_dir: full_snapshot_archives_dir.as_ref().to_path_buf(), + incremental_snapshot_archives_dir: incremental_snapshot_archives_dir + .as_ref() + .to_path_buf(), + ..Default::default() + }; + let load_only_snapshot_config = SnapshotConfig { + bank_snapshots_dir: snapshot_config.bank_snapshots_dir.clone(), + full_snapshot_archives_dir: snapshot_config.full_snapshot_archives_dir.clone(), + incremental_snapshot_archives_dir: snapshot_config + .incremental_snapshot_archives_dir + .clone(), + ..SnapshotConfig::new_load_only() + }; + + let genesis_config = GenesisConfig::default(); + let mut bank = Arc::new(Bank::new_for_tests(&genesis_config)); + + // take some snapshots, and archive them + for _ in 0..snapshot_config + .maximum_full_snapshot_archives_to_retain + .get() + { + let slot = bank.slot() + 1; + bank = Arc::new(Bank::new_from_parent(bank, &Pubkey::default(), slot)); + bank.fill_bank_with_ticks_for_tests(); + bank.squash(); + bank.force_flush_accounts_cache(); + bank.update_accounts_hash(CalcAccountsHashDataSource::Storages, false, false); + let snapshot_storages = bank.get_snapshot_storages(None); + let slot_deltas = bank.status_cache.read().unwrap().root_slot_deltas(); + let bank_snapshot_info = add_bank_snapshot( + &bank_snapshots_dir, + &bank, + &snapshot_storages, + snapshot_config.snapshot_version, + slot_deltas, + ) + .unwrap(); + assert!( + crate::serde_snapshot::reserialize_bank_with_new_accounts_hash( + &bank_snapshot_info.snapshot_dir, + bank.slot(), + &bank.get_accounts_hash().unwrap(), + None, + ) + ); + write_full_snapshot_slot_file(&bank_snapshot_info.snapshot_dir, slot).unwrap(); + package_and_archive_full_snapshot( + &bank, + &bank_snapshot_info, + &full_snapshot_archives_dir, + &incremental_snapshot_archives_dir, + snapshot_storages, + snapshot_config.archive_format, + snapshot_config.snapshot_version, + snapshot_config.maximum_full_snapshot_archives_to_retain, + snapshot_config.maximum_incremental_snapshot_archives_to_retain, + ) + .unwrap(); + } + + // take another snapshot, but leave it as PRE + let slot = bank.slot() + 1; + bank = Arc::new(Bank::new_from_parent(bank, &Pubkey::default(), slot)); + bank.fill_bank_with_ticks_for_tests(); + bank.squash(); + bank.force_flush_accounts_cache(); + let snapshot_storages = bank.get_snapshot_storages(None); + let slot_deltas = bank.status_cache.read().unwrap().root_slot_deltas(); + add_bank_snapshot( + &bank_snapshots_dir, + &bank, + &snapshot_storages, + SnapshotVersion::default(), + slot_deltas, + ) + .unwrap(); + + let highest_full_snapshot_archive = + get_highest_full_snapshot_archive_info(&full_snapshot_archives_dir).unwrap(); + let highest_bank_snapshot_post = + get_highest_bank_snapshot_post(&bank_snapshots_dir).unwrap(); + let highest_bank_snapshot_pre = get_highest_bank_snapshot_pre(&bank_snapshots_dir).unwrap(); + + // we want a bank snapshot PRE with the highest slot to ensure get_highest_loadable() + // correctly skips bank snapshots PRE + assert!(highest_bank_snapshot_pre.slot > highest_bank_snapshot_post.slot); + + // 1. call get_highest_loadable() but bad snapshot dir, so returns None + assert!(get_highest_loadable_bank_snapshot(&SnapshotConfig::default()).is_none()); + + // 2. get_highest_loadable(), should return highest_bank_snapshot_post_slot + let bank_snapshot = get_highest_loadable_bank_snapshot(&snapshot_config).unwrap(); + assert_eq!(bank_snapshot, highest_bank_snapshot_post); + + // 3. delete highest full snapshot archive, get_highest_loadable() should return NONE + fs::remove_file(highest_full_snapshot_archive.path()).unwrap(); + assert!(get_highest_loadable_bank_snapshot(&snapshot_config).is_none()); + + // 4. get_highest_loadable(), but with a load-only snapshot config, should return Some() + let bank_snapshot = get_highest_loadable_bank_snapshot(&load_only_snapshot_config).unwrap(); + assert_eq!(bank_snapshot, highest_bank_snapshot_post); + + // 5. delete highest bank snapshot, get_highest_loadable() should return Some() again, with slot-1 + fs::remove_dir_all(&highest_bank_snapshot_post.snapshot_dir).unwrap(); + let bank_snapshot = get_highest_loadable_bank_snapshot(&snapshot_config).unwrap(); + assert_eq!(bank_snapshot.slot, highest_bank_snapshot_post.slot - 1); + + // 6. delete the full snapshot slot file, get_highest_loadable() should return NONE + fs::remove_file( + bank_snapshot + .snapshot_dir + .join(SNAPSHOT_FULL_SNAPSHOT_SLOT_FILENAME), + ) + .unwrap(); + assert!(get_highest_loadable_bank_snapshot(&snapshot_config).is_none()); + + // 7. however, a load-only snapshot config should return Some() again + let bank_snapshot2 = + get_highest_loadable_bank_snapshot(&load_only_snapshot_config).unwrap(); + assert_eq!(bank_snapshot2, bank_snapshot); + } } diff --git a/runtime/src/snapshot_utils.rs b/runtime/src/snapshot_utils.rs index 10f715c2597b56..9da2205d0c616f 100644 --- a/runtime/src/snapshot_utils.rs +++ b/runtime/src/snapshot_utils.rs @@ -4,6 +4,7 @@ use { snapshot_archive_info::{ FullSnapshotArchiveInfo, IncrementalSnapshotArchiveInfo, SnapshotArchiveInfoGetter, }, + snapshot_config::SnapshotConfig, snapshot_hash::SnapshotHash, snapshot_package::SnapshotPackage, snapshot_utils::snapshot_storage_rebuilder::{ @@ -58,6 +59,7 @@ pub const SNAPSHOT_VERSION_FILENAME: &str = "version"; pub const SNAPSHOT_STATE_COMPLETE_FILENAME: &str = "state_complete"; pub const SNAPSHOT_ACCOUNTS_HARDLINKS: &str = "accounts_hardlinks"; pub const SNAPSHOT_ARCHIVE_DOWNLOAD_DIR: &str = "remote"; +pub const SNAPSHOT_FULL_SNAPSHOT_SLOT_FILENAME: &str = "full_snapshot_slot"; pub const MAX_SNAPSHOT_DATA_FILE_SIZE: u64 = 32 * 1024 * 1024 * 1024; // 32 GiB const MAX_SNAPSHOT_VERSION_FILE_SIZE: u64 = 8; // byte const VERSION_STRING_V1_2_0: &str = "1.2.0"; @@ -625,6 +627,76 @@ fn is_bank_snapshot_complete(bank_snapshot_dir: impl AsRef) -> bool { state_complete_path.is_file() } +/// Writes the full snapshot slot file into the bank snapshot dir +pub fn write_full_snapshot_slot_file( + bank_snapshot_dir: impl AsRef, + full_snapshot_slot: Slot, +) -> IoResult<()> { + let full_snapshot_slot_path = bank_snapshot_dir + .as_ref() + .join(SNAPSHOT_FULL_SNAPSHOT_SLOT_FILENAME); + fs::write( + &full_snapshot_slot_path, + Slot::to_le_bytes(full_snapshot_slot), + ) + .map_err(|err| { + IoError::other(format!( + "failed to write full snapshot slot file '{}': {err}", + full_snapshot_slot_path.display(), + )) + }) +} + +// Reads the full snapshot slot file from the bank snapshot dir +pub fn read_full_snapshot_slot_file(bank_snapshot_dir: impl AsRef) -> IoResult { + const SLOT_SIZE: usize = std::mem::size_of::(); + let full_snapshot_slot_path = bank_snapshot_dir + .as_ref() + .join(SNAPSHOT_FULL_SNAPSHOT_SLOT_FILENAME); + let full_snapshot_slot_file_metadata = fs::metadata(&full_snapshot_slot_path)?; + if full_snapshot_slot_file_metadata.len() != SLOT_SIZE as u64 { + let error_message = format!( + "invalid full snapshot slot file size: '{}' has {} bytes (should be {} bytes)", + full_snapshot_slot_path.display(), + full_snapshot_slot_file_metadata.len(), + SLOT_SIZE, + ); + return Err(IoError::other(error_message)); + } + let mut full_snapshot_slot_file = fs::File::open(&full_snapshot_slot_path)?; + let mut buffer = [0; SLOT_SIZE]; + full_snapshot_slot_file.read_exact(&mut buffer)?; + let slot = Slot::from_le_bytes(buffer); + Ok(slot) +} + +/// Gets the highest, loadable, bank snapshot +/// +/// The highest bank snapshot is the one with the highest slot. +/// To be loadable, the bank snapshot must be a BankSnapshotKind::Post. +/// And if we're generating snapshots (e.g. running a normal validator), then +/// the full snapshot file's slot must match the highest full snapshot archive's. +pub fn get_highest_loadable_bank_snapshot( + snapshot_config: &SnapshotConfig, +) -> Option { + let highest_bank_snapshot = + get_highest_bank_snapshot_post(&snapshot_config.bank_snapshots_dir)?; + + // If we're *not* generating snapshots, e.g. running ledger-tool, then we *can* load + // this bank snapshot, and we do not need to check for anything else. + if !snapshot_config.should_generate_snapshots() { + return Some(highest_bank_snapshot); + } + + // Otherwise, the bank snapshot's full snapshot slot *must* be the same as + // the highest full snapshot archive's slot. + let highest_full_snapshot_archive_slot = + get_highest_full_snapshot_archive_slot(&snapshot_config.full_snapshot_archives_dir)?; + let full_snapshot_file_slot = + read_full_snapshot_slot_file(&highest_bank_snapshot.snapshot_dir).ok()?; + (full_snapshot_file_slot == highest_full_snapshot_archive_slot).then_some(highest_bank_snapshot) +} + /// If the validator halts in the middle of `archive_snapshot_package()`, the temporary staging /// directory won't be cleaned up. Call this function to clean them up. pub fn remove_tmp_snapshot_archives(snapshot_archives_dir: impl AsRef) { @@ -2269,6 +2341,7 @@ mod tests { std::{convert::TryFrom, mem::size_of}, tempfile::NamedTempFile, }; + #[test] fn test_serialize_snapshot_data_file_under_limit() { let temp_dir = tempfile::TempDir::new().unwrap(); @@ -3211,4 +3284,34 @@ mod tests { Err(GetSnapshotAccountsHardLinkDirError::GetAccountPath(_)) ); } + + #[test] + fn test_full_snapshot_slot_file_good() { + let slot_written = 123_456_789; + let bank_snapshot_dir = TempDir::new().unwrap(); + write_full_snapshot_slot_file(&bank_snapshot_dir, slot_written).unwrap(); + + let slot_read = read_full_snapshot_slot_file(&bank_snapshot_dir).unwrap(); + assert_eq!(slot_read, slot_written); + } + + #[test] + fn test_full_snapshot_slot_file_bad() { + const SLOT_SIZE: usize = std::mem::size_of::(); + let too_small = [1u8; SLOT_SIZE - 1]; + let too_large = [1u8; SLOT_SIZE + 1]; + + for contents in [too_small.as_slice(), too_large.as_slice()] { + let bank_snapshot_dir = TempDir::new().unwrap(); + let full_snapshot_slot_path = bank_snapshot_dir + .as_ref() + .join(SNAPSHOT_FULL_SNAPSHOT_SLOT_FILENAME); + fs::write(full_snapshot_slot_path, contents).unwrap(); + + let err = read_full_snapshot_slot_file(&bank_snapshot_dir).unwrap_err(); + assert!(err + .to_string() + .starts_with("invalid full snapshot slot file size")); + } + } }