From c8a839aeffa76b0b3325d642ee38bba2595170b4 Mon Sep 17 00:00:00 2001 From: Wen <113942165+wen-coding@users.noreply.github.com> Date: Sat, 24 Aug 2024 19:32:11 -0700 Subject: [PATCH] Enable wen restart from command line and update comments. (#2640) Enable wen_restart from command line and update comments. --- validator/src/cli.rs | 19 ++++++++++--------- validator/src/main.rs | 6 ++++++ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/validator/src/cli.rs b/validator/src/cli.rs index 9f4276d8d67ee0..38410aaeabd7ab 100644 --- a/validator/src/cli.rs +++ b/validator/src/cli.rs @@ -1580,7 +1580,9 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> { .required(false) .conflicts_with("wait_for_supermajority") .help( - "When specified, the validator will enter Wen Restart mode which \ + "Only used during coordinated cluster restarts.\ + \n\n\ + When specified, the validator will enter Wen Restart mode which \ pauses normal activity. Validators in this mode will gossip their last \ vote to reach consensus on a safe restart slot and repair all blocks \ on the selected fork. The safe slot will be a descendant of the latest \ @@ -1588,16 +1590,15 @@ pub fn app<'a>(version: &'a str, default_args: &'a DefaultArgs) -> App<'a, 'a> { optimistically confirmed slots. \ \n\n\ The progress in this mode will be saved in the file location provided. \ - If consensus is reached, the validator will automatically exit and then \ - execute wait_for_supermajority logic so the cluster will resume execution. \ - The progress file will be kept around for future debugging. \ - \n\n\ - After the cluster resumes normal operation, the validator arguments can \ - be adjusted to remove --wen_restart and update expected_shred_version to \ - the new shred_version agreed on in the consensus. \ + If consensus is reached, the validator will automatically exit with 200 \ + status code. Then the operators are expected to restart the validator \ + with --wait_for_supermajority and other arguments (including new shred_version, \ + supermajority slot, and bankhash) given in the error log before the exit so \ + the cluster will resume execution. The progress file will be kept around \ + for future debugging. \ \n\n\ If wen_restart fails, refer to the progress file (in proto3 format) for \ - further debugging.", + further debugging and watch the discord channel for instructions.", ), ) .args(&thread_args(&default_args.thread_args)) diff --git a/validator/src/main.rs b/validator/src/main.rs index 87b6a44cd9666e..c61b0f6d3ec87e 100644 --- a/validator/src/main.rs +++ b/validator/src/main.rs @@ -1523,6 +1523,7 @@ pub fn main() { replay_transactions_threads, delay_leader_block_for_pending_fork: matches .is_present("delay_leader_block_for_pending_fork"), + wen_restart_proto_path: value_t!(matches, "wen_restart", PathBuf).ok(), ..ValidatorConfig::default() }; @@ -1965,6 +1966,11 @@ pub fn main() { let mut node = Node::new_with_external_ip(&identity_keypair.pubkey(), node_config); if restricted_repair_only_mode { + if validator_config.wen_restart_proto_path.is_some() { + error!("--restricted-repair-only-mode is not compatible with --wen_restart"); + exit(1); + } + // When in --restricted_repair_only_mode is enabled only the gossip and repair ports // need to be reachable by the entrypoint to respond to gossip pull requests and repair // requests initiated by the node. All other ports are unused.