Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use systemd watchdog to restart a node #2004

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/base_benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
- name: Install Rust
run: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install dependencies
run: sudo add-apt-repository ppa:ethereum/ethereum && sudo apt update && sudo apt install -y solc build-essential pkg-config libssl-dev cmake protobuf-compiler
run: sudo add-apt-repository ppa:ethereum/ethereum && sudo apt update && sudo apt install -y solc build-essential pkg-config libssl-dev cmake protobuf-compiler libsystemd-dev
- name: Track base branch benchmarks
run: |
bencher run \
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:
- name: Install nightly
run: rustup toolchain install nightly
- name: Install dependencies
run: sudo add-apt-repository ppa:ethereum/ethereum && sudo apt update && sudo apt install -y solc build-essential pkg-config libssl-dev cmake protobuf-compiler
run: sudo add-apt-repository ppa:ethereum/ethereum && sudo apt update && sudo apt install -y solc build-essential pkg-config libssl-dev cmake protobuf-compiler libsystemd-dev
- uses: actions/checkout@v4
with:
submodules: recursive
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pr_benchmarks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: Install Rust
run: curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y && echo "$HOME/.cargo/bin" >> $GITHUB_PATH
- name: Install dependencies
run: sudo add-apt-repository ppa:ethereum/ethereum && sudo apt update && sudo apt install -y solc build-essential pkg-config libssl-dev cmake protobuf-compiler
run: sudo add-apt-repository ppa:ethereum/ethereum && sudo apt update && sudo apt install -y solc build-essential pkg-config libssl-dev cmake protobuf-compiler libsystemd-dev
- name: Track PR Benchmarks
run: |
bencher run \
Expand Down
81 changes: 79 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM rust:1.83.0-slim-bullseye as builder
ARG is_release=false
RUN apt update -y && \
apt upgrade -y && \
apt install -y protobuf-compiler
apt install -y protobuf-compiler libsystemd-dev pkg-config

RUN apt autoremove

Expand Down
3 changes: 3 additions & 0 deletions z2/resources/config.tera.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ data_dir = "/data"
consensus.genesis_accounts = [ ["{{ genesis_address }}", "900_000_000_000_000_000_000_000_000" ] ]
consensus.genesis_deposits = [ ["{{ bootstrap_bls_public_key }}", "{{ bootstrap_peer_id }}", "20_000_000_000_000_000_000_000_000", "0x0000000000000000000000000000000000000000", "{{ genesis_address }}"] ]

# API gateway
remote_api_url = "{{ remote_api_url }}"

# Reward parameters
consensus.rewards_per_hour = "51_000_000_000_000_000_000_000"
consensus.blocks_per_hour = 3600
Expand Down
3 changes: 2 additions & 1 deletion z2/resources/node_provision.tera.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,12 @@ def health():
Description=Zilliqa Node

[Service]
WatchdogSec=60s
Type=forking
ExecStart=/usr/local/bin/zq2.sh start """ + SECRET_KEY + """
ExecStop=/usr/local/bin/zq2.sh stop
RemainAfterExit=yes
Restart=on-failure
Restart=on-failure,on-watchdog
RestartSec=10

Environment="RUST_LOG=zilliqa=debug"
Expand Down
2 changes: 2 additions & 0 deletions z2/src/chain/node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,6 +515,7 @@ impl ChainNode {
let role_name = self.role.to_string();
let eth_chain_id = self.eth_chain_id.to_string();
let bootstrap_public_ip = selected_bootstrap.machine.external_address;
let remote_api_url = self.chain()?.get_endpoint()?; // API gateway
let whitelisted_evm_contract_addresses = self.chain()?.get_whitelisted_evm_contracts();
// 4201 is the publically exposed port - We don't expose everything there.
let public_api = if self.role == NodeRole::Api {
Expand Down Expand Up @@ -544,6 +545,7 @@ impl ChainNode {
);
ctx.insert("api_servers", &api_servers);
ctx.insert("enable_ots_indices", &enable_ots_indices);
ctx.insert("remote_api_url", remote_api_url);

Ok(Tera::one_off(spec_config, &ctx, false)?)
}
Expand Down
7 changes: 4 additions & 3 deletions z2/src/setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ use zilliqa::{
block_request_limit_default, consensus_timeout_default, empty_block_timeout_default,
eth_chain_id_default, failed_request_sleep_duration_default, local_address_default,
max_blocks_in_flight_default, minimum_time_left_for_empty_block_default,
scilla_address_default, scilla_ext_libs_path_default, scilla_stdlib_dir_default,
state_rpc_limit_default, total_native_token_supply_default, Amount, ApiServer,
ConsensusConfig, GenesisDeposit,
remote_api_url_default, scilla_address_default, scilla_ext_libs_path_default,
scilla_stdlib_dir_default, state_rpc_limit_default, total_native_token_supply_default,
Amount, ApiServer, ConsensusConfig, GenesisDeposit,
},
transaction::EvmGas,
};
Expand Down Expand Up @@ -544,6 +544,7 @@ impl Setup {
block_request_batch_size: block_request_batch_size_default(),
state_rpc_limit: state_rpc_limit_default(),
failed_request_sleep_duration: failed_request_sleep_duration_default(),
remote_api_url: remote_api_url_default(),
enable_ots_indices: false,
};
println!("🧩 Node {node_index} has RPC port {port}");
Expand Down
2 changes: 2 additions & 0 deletions zilliqa/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ serde_repr = "0.1.19"
thiserror = "2.0.6"
lru-mem = "0.3.0"
opentelemetry-semantic-conventions = { version = "0.27.0", features = ["semconv_experimental"] }
systemd = "0.10.0"
url = "2.5.4"

[dev-dependencies]
alloy = { version = "0.6.4", default-features = false, features = ["network", "rand", "signers", "signer-local"] }
Expand Down
10 changes: 10 additions & 0 deletions zilliqa/src/cfg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use alloy::primitives::Address;
use libp2p::{Multiaddr, PeerId};
use rand::{distributions::Alphanumeric, Rng};
use serde::{de, Deserialize, Deserializer, Serialize, Serializer};
use url::Url;

use crate::{
crypto::{Hash, NodePublicKey},
Expand Down Expand Up @@ -110,6 +111,10 @@ pub struct NodeConfig {
/// Defaults to 10 seconds.
#[serde(default = "failed_request_sleep_duration_default")]
pub failed_request_sleep_duration: Duration,
/// Point to API gateway - used to check for block progress.
/// Defaults to localhost.
#[serde(default = "remote_api_url_default")]
pub remote_api_url: Url,
/// Enable additional indices used by some Otterscan APIs. Enabling this will use more disk space and block processing will take longer.
#[serde(default)]
pub enable_ots_indices: bool,
Expand All @@ -131,6 +136,7 @@ impl Default for NodeConfig {
block_request_batch_size: block_request_batch_size_default(),
state_rpc_limit: state_rpc_limit_default(),
failed_request_sleep_duration: failed_request_sleep_duration_default(),
remote_api_url: remote_api_url_default(),
enable_ots_indices: false,
}
}
Expand Down Expand Up @@ -175,6 +181,10 @@ pub fn state_cache_size_default() -> usize {
256 * 1024 * 1024 // 256 MiB
}

pub fn remote_api_url_default() -> Url {
Url::parse("http://localhost:4201").unwrap()
}

pub fn eth_chain_id_default() -> u64 {
700 + 0x8000
}
Expand Down
Loading
Loading