Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Send and Aggregate RestartHeaviestFork. #699

Merged
Changes from 1 commit
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
a91e6c2
Send and Aggregate RestartHeaviestFork.
wen-coding Apr 9, 2024
0eb60a3
total_active_stake in my_heaviest_fork should always be the sum of the
wen-coding Apr 10, 2024
c953564
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 10, 2024
2e9650c
A few name changes and other small fixes.
wen-coding Apr 15, 2024
bb679b0
Move active_peers update to after stakes_map is updated.
wen-coding Apr 15, 2024
47b6a0d
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 15, 2024
8a2d0c8
Only send out RestartHeaviestFork and write snapshots every 30 minutes.
wen-coding Apr 15, 2024
2814ab6
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 15, 2024
3167a48
Proceed if 5% of the nodes disagree and log the disagreement if the
wen-coding Apr 17, 2024
54b47ee
Make linter happy.
wen-coding Apr 17, 2024
acd7341
Make linter happy.
wen-coding Apr 17, 2024
645aafa
Add successful case.
wen-coding Apr 17, 2024
bf265a6
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 17, 2024
15e6824
Add a few constants and methods.
wen-coding Apr 22, 2024
f3ac6c9
Account for 5% non_conforming when calculating exit threshold.
wen-coding Apr 22, 2024
bbe83fa
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 22, 2024
f9f0c70
Adding a few more logs.
wen-coding Apr 22, 2024
f7f078d
Fix tests to use 75% when aggregating HeaviestFork and a few bugs.
wen-coding Apr 22, 2024
aff9399
Reuse adjusted_threhold_percent.
wen-coding Apr 23, 2024
601e06c
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 23, 2024
a45edb0
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 25, 2024
e10467f
Merge branch 'master' into wen_restart_push_and_aggregate_heaviest_fork
wen-coding Apr 26, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 18 additions & 99 deletions wen-restart/src/wen_restart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ use {
Arc, RwLock,
},
thread::sleep,
time::Duration,
time::{Duration, Instant},
},
};

Expand Down Expand Up @@ -466,12 +466,13 @@ pub(crate) fn aggregate_restart_heaviest_fork(
total_active_stake,
);

let mut progress_last_sent = Instant::now();
let mut cursor = solana_gossip::crds::Cursor::default();
let mut progress_changed = false;
loop {
if exit.load(Ordering::Relaxed) {
return Err(WenRestartError::Exiting.into());
}
let mut progress_changed = false;
let start = timestamp();
for new_heaviest_fork in cluster_info.get_restart_heaviest_fork(&mut cursor) {
let from = new_heaviest_fork.from.to_string();
Expand All @@ -488,11 +489,6 @@ pub(crate) fn aggregate_restart_heaviest_fork(
let current_total_active_stake = heaviest_fork_aggregate.total_active_stake();
if current_total_active_stake > total_active_stake {
total_active_stake = current_total_active_stake;
cluster_info.push_restart_heaviest_fork(
heaviest_fork_slot,
heaviest_fork_hash,
current_total_active_stake,
);
progress
.my_heaviest_fork
.as_mut()
Expand All @@ -501,7 +497,7 @@ pub(crate) fn aggregate_restart_heaviest_fork(
progress_changed = true;
}
if progress_changed {
write_wen_restart_records(wen_restart_path, progress)?;
progress_changed = false;
let total_active_stake_seen_supermajority =
heaviest_fork_aggregate.total_active_stake_seen_supermajority();
info!(
Expand All @@ -510,9 +506,20 @@ pub(crate) fn aggregate_restart_heaviest_fork(
heaviest_fork_aggregate.total_active_stake(),
total_stake
);
if total_active_stake_seen_supermajority as f64 * 100.0 / total_stake as f64
> wait_for_supermajority_threshold_percent as f64
{
let can_exit = total_active_stake_seen_supermajority as f64 * 100.0
/ total_stake as f64
> wait_for_supermajority_threshold_percent as f64;
carllin marked this conversation as resolved.
Show resolved Hide resolved
// Only send out updates every 30 minutes or when we can exit.
if progress_last_sent.elapsed().as_secs() >= 1800 || can_exit {
carllin marked this conversation as resolved.
Show resolved Hide resolved
cluster_info.push_restart_heaviest_fork(
heaviest_fork_slot,
heaviest_fork_hash,
current_total_active_stake,
);
write_wen_restart_records(wen_restart_path, progress)?;
progress_last_sent = Instant::now();
}
if can_exit {
break;
}
}
Expand Down Expand Up @@ -2002,92 +2009,4 @@ mod tests {
),
);
}

#[test]
fn test_wen_restart_aggregate_heaviest_fork_stop_and_restart() {
solana_logger::setup();
let ledger_path = get_tmp_ledger_path_auto_delete!();
let test_state = wen_restart_test_init(&ledger_path);
let heaviest_fork_slot = test_state.last_voted_fork_slots[0] + 3;
let heaviest_fork_bankhash = Hash::new_unique();
let progress = WenRestartProgress {
state: RestartState::HeaviestFork.into(),
my_heaviest_fork: Some(HeaviestForkRecord {
slot: heaviest_fork_slot,
bankhash: heaviest_fork_bankhash.to_string(),
total_active_stake: 0,
shred_version: SHRED_VERSION as u32,
wallclock: 0,
}),
..Default::default()
};
assert!(write_wen_restart_records(&test_state.wen_restart_proto_path, &progress).is_ok());
let mut rng = rand::thread_rng();
let mut expected_messages = HashMap::new();
for i in 0..8 {
let keypairs = &test_state.validator_voting_keypairs[i + 2];
let wen_restart_proto_path_clone = test_state.wen_restart_proto_path.clone();
let cluster_info_clone = test_state.cluster_info.clone();
let bank_forks_clone = test_state.bank_forks.clone();
let exit = Arc::new(AtomicBool::new(false));
let exit_clone = exit.clone();
let mut progress_clone = progress.clone();
let wen_restart_thread_handle = Builder::new()
.name("solana-wen-restart".to_string())
.spawn(move || {
let _ = aggregate_restart_heaviest_fork(
&wen_restart_proto_path_clone,
80,
cluster_info_clone,
bank_forks_clone,
exit_clone,
&mut progress_clone,
);
})
.unwrap();
let node_pubkey = keypairs.node_keypair.pubkey();
let node = LegacyContactInfo::new_rand(&mut rng, Some(node_pubkey));
let total_active_stake = (i + 2) as u64 * 100;
let now = timestamp();
push_restart_heaviest_fork(
test_state.cluster_info.clone(),
&node,
heaviest_fork_slot,
&heaviest_fork_bankhash,
total_active_stake,
&keypairs.node_keypair,
now,
);
expected_messages.insert(
node_pubkey.to_string(),
HeaviestForkRecord {
slot: heaviest_fork_slot,
bankhash: heaviest_fork_bankhash.to_string(),
total_active_stake,
shred_version: SHRED_VERSION as u32,
wallclock: now,
},
);
wait_on_expected_progress_with_timeout(
test_state.wen_restart_proto_path.clone(),
WenRestartProgress {
state: RestartState::HeaviestFork.into(),
my_heaviest_fork: Some(HeaviestForkRecord {
slot: heaviest_fork_slot,
bankhash: heaviest_fork_bankhash.to_string(),
total_active_stake,
shred_version: SHRED_VERSION as u32,
wallclock: 0,
}),
heaviest_fork_aggregate: Some(HeaviestForkAggregateRecord {
received: expected_messages.clone(),
final_result: None,
}),
..Default::default()
},
);
exit.store(true, Ordering::Relaxed);
let _ = wen_restart_thread_handle.join();
}
}
}
Loading