Skip to content

Commit

Permalink
A0-3022: Abft Score Metrics (#1898)
Browse files Browse the repository at this point in the history
Co-authored-by: Michal Swietek <[email protected]>
  • Loading branch information
mike1729 and mike1729 authored Dec 23, 2024
1 parent 7b31cbb commit 0e6fa63
Show file tree
Hide file tree
Showing 9 changed files with 93 additions and 18 deletions.
15 changes: 11 additions & 4 deletions bin/chain-bootstrapper/src/chain_spec/cli.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use primitives::{AccountId, Version as FinalityVersion, LEGACY_FINALITY_VERSION};
use primitives::{
AccountId, Version as FinalityVersion, CURRENT_FINALITY_VERSION, LEGACY_FINALITY_VERSION,
};
use sc_chain_spec::ChainType;
use sc_cli::clap::{self, Args};

Expand Down Expand Up @@ -43,8 +45,8 @@ pub struct ChainSpecParams {
rich_account_ids: Option<Vec<AccountId>>,

/// Finality version at chain inception.
#[arg(long, default_value = LEGACY_FINALITY_VERSION.to_string())]
finality_version: FinalityVersion,
#[arg(long, default_value = "legacy")]
finality_version: String,
}

impl ChainSpecParams {
Expand Down Expand Up @@ -81,6 +83,11 @@ impl ChainSpecParams {
}

pub fn finality_version(&self) -> FinalityVersion {
self.finality_version
match self.finality_version.as_str() {
"current" => CURRENT_FINALITY_VERSION,
"legacy" => LEGACY_FINALITY_VERSION,
_ => panic!("finality version should be 'current' or 'legacy'"),
}
.into()
}
}
8 changes: 8 additions & 0 deletions finality-aleph/src/abft/current/performance/service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use crate::{
LOG_TARGET,
},
data_io::AlephData,
metrics::ScoreMetrics,
party::manager::Runnable,
Hasher, UnverifiedHeader,
};
Expand Down Expand Up @@ -62,8 +63,10 @@ pub struct Service<UH>
where
UH: UnverifiedHeader,
{
my_index: usize,
batches_from_abft: mpsc::UnboundedReceiver<Batch<UH>>,
scorer: Scorer,
metrics: ScoreMetrics,
}

impl<UH> Service<UH>
Expand All @@ -73,8 +76,10 @@ where
/// Create a new service, together with a unit finalization handler that should be passed to
/// ABFT. It will wrap the provided finalization handler and call it in the background.
pub fn new<FH>(
my_index: usize,
n_members: usize,
finalization_handler: FH,
metrics: ScoreMetrics,
) -> (
Self,
impl current_aleph_bft::UnitFinalizationHandler<Data = AlephData<UH>, Hasher = Hasher>,
Expand All @@ -85,8 +90,10 @@ where
let (batches_for_us, batches_from_abft) = mpsc::unbounded();
(
Service {
my_index,
batches_from_abft,
scorer: Scorer::new(NodeCount(n_members)),
metrics,
},
FinalizationWrapper::new(finalization_handler, batches_for_us),
)
Expand All @@ -110,6 +117,7 @@ where
},
};
debug!(target: LOG_TARGET, "Received ABFT score: {:?}.", score);
self.metrics.report_score(score[self.my_index]);
// TODO(A0-4339): sometimes submit these scores to the chain.
}
_ = &mut exit => {
Expand Down
31 changes: 31 additions & 0 deletions finality-aleph/src/metrics/abft_score.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use substrate_prometheus_endpoint::{register, Gauge, PrometheusError, Registry, U64};

#[derive(Clone)]
pub enum ScoreMetrics {
Prometheus { my_score: Gauge<U64> },
Noop,
}

impl ScoreMetrics {
pub fn new(registry: Option<Registry>) -> Result<Self, PrometheusError> {
match registry {
Some(registry) => Ok(ScoreMetrics::Prometheus {
my_score: register(
Gauge::new("my_abft_score", "My abft score observed in last batch")?,
&registry,
)?,
}),
None => Ok(ScoreMetrics::Noop),
}
}

pub fn noop() -> Self {
ScoreMetrics::Noop
}

pub fn report_score(&self, score: u16) {
if let ScoreMetrics::Prometheus { my_score } = self {
my_score.set(score.into());
}
}
}
2 changes: 2 additions & 0 deletions finality-aleph/src/metrics/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
mod abft_score;
mod best_block;
mod finality_rate;
mod slo;
mod timing;
pub mod transaction_pool;

pub use abft_score::ScoreMetrics;
pub use slo::{run_metrics_service, SloMetrics};
pub use timing::{Checkpoint, DefaultClock};
pub type TimingBlockMetrics = timing::TimingBlockMetrics<DefaultClock>;
Expand Down
10 changes: 8 additions & 2 deletions finality-aleph/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
crypto::AuthorityPen,
finalization::AlephFinalizer,
idx_to_account::ValidatorIndexToAccountIdConverterImpl,
metrics::{run_metrics_service, SloMetrics},
metrics::{run_metrics_service, ScoreMetrics, SloMetrics},
network::{
address_cache::validator_address_cache_updater,
session::{ConnectionManager, ConnectionManagerConfig},
Expand Down Expand Up @@ -146,6 +146,11 @@ where

let chain_events = client.chain_status_notifier();

let score_metrics = ScoreMetrics::new(registry.clone()).unwrap_or_else(|e| {
debug!(target: LOG_TARGET, "Failed to create metrics: {}.", e);
ScoreMetrics::noop()
});

let slo_metrics = SloMetrics::new(registry.as_ref(), chain_status.clone());
let timing_metrics = slo_metrics.timing_metrics().clone();

Expand Down Expand Up @@ -199,7 +204,7 @@ where
verifier.clone(),
session_info.clone(),
sync_io,
registry.clone(),
registry,
slo_metrics,
favourite_block_user_requests,
) {
Expand Down Expand Up @@ -266,6 +271,7 @@ where
spawn_handle,
connection_manager,
keystore,
score_metrics,
),
session_info,
});
Expand Down
28 changes: 18 additions & 10 deletions finality-aleph/src/party/manager/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use crate::{
},
crypto::{AuthorityPen, AuthorityVerifier},
data_io::{ChainTracker, DataStore, OrderedDataInterpreter, SubstrateChainInfoProvider},
metrics::TimingBlockMetrics,
metrics::{ScoreMetrics, TimingBlockMetrics},
mpsc,
network::{
data::{
Expand All @@ -32,6 +32,7 @@ use crate::{
},
party::{
backup::ABFTBackup, manager::aggregator::AggregatorVersion, traits::NodeSessionManager,
LOG_TARGET,
},
sync::JustificationSubmissions,
AuthorityId, BlockId, CurrentRmcNetworkData, Keychain, LegacyRmcNetworkData, NodeIndex,
Expand Down Expand Up @@ -114,6 +115,7 @@ where
spawn_handle: SpawnHandle,
session_manager: SM,
keystore: Arc<LocalKeystore>,
score_metrics: ScoreMetrics,
_phantom: PhantomData<(B, H)>,
}

Expand Down Expand Up @@ -146,6 +148,7 @@ where
spawn_handle: SpawnHandle,
session_manager: SM,
keystore: Arc<LocalKeystore>,
score_metrics: ScoreMetrics,
) -> Self {
Self {
client,
Expand All @@ -161,6 +164,7 @@ where
spawn_handle,
session_manager,
keystore,
score_metrics,
_phantom: PhantomData,
}
}
Expand Down Expand Up @@ -274,8 +278,12 @@ where
self.verifier.clone(),
session_boundaries.clone(),
);
let (abft_performance, abft_batch_handler) =
CurrentPerformanceService::new(n_members, ordered_data_interpreter);
let (abft_performance, abft_batch_handler) = CurrentPerformanceService::new(
node_id.into(),
n_members,
ordered_data_interpreter,
self.score_metrics.clone(),
);
let consensus_config =
current_create_aleph_config(n_members, node_id, session_id, self.unit_creation_delay);
let data_network = data_network.map();
Expand Down Expand Up @@ -325,7 +333,7 @@ where
exit_rx: oneshot::Receiver<()>,
backup: ABFTBackup,
) -> Subtasks {
debug!(target: "afa", "Authority task {:?}", session_id);
debug!(target: LOG_TARGET, "Authority task {:?}", session_id);

let authority_verifier = AuthorityVerifier::new(authorities.to_vec());
let authority_pen =
Expand Down Expand Up @@ -389,17 +397,17 @@ where
{
#[cfg(feature = "only_legacy")]
_ if self.only_legacy() => {
info!(target: "aleph-party", "Running session with legacy-only AlephBFT version.");
info!(target: LOG_TARGET, "Running session with legacy-only AlephBFT version.");
self.legacy_subtasks(params)
}
// The `as`es here should be removed, but this would require a pallet migration and I
// am lazy.
Ok(version) if version == CURRENT_VERSION as u32 => {
info!(target: "aleph-party", "Running session with AlephBFT version {}, which is current.", version);
info!(target: LOG_TARGET, "Running session with AlephBFT version {}, which is current.", version);
self.current_subtasks(params)
}
Ok(version) if version == LEGACY_VERSION as u32 => {
info!(target: "aleph-party", "Running session with AlephBFT version {}, which is legacy.", version);
info!(target: LOG_TARGET, "Running session with AlephBFT version {}, which is legacy.", version);
self.legacy_subtasks(params)
}
Ok(version) if version > CURRENT_VERSION as u32 => {
Expand All @@ -408,8 +416,8 @@ where
)
}
Ok(version) => {
info!(target: "aleph-party", "Attempting to run session with too old version {}, likely because we are synchronizing old sessions for which we have keys. This will not work, but it doesn't matter.", version);
info!(target: "aleph-party", "Running session with AlephBFT version {}, which is legacy.", LEGACY_VERSION);
info!(target: LOG_TARGET, "Attempting to run session with too old version {}, likely because we are synchronizing old sessions for which we have keys. This will not work, but it doesn't matter.", version);
info!(target: LOG_TARGET, "Running session with AlephBFT version {}, which is legacy.", LEGACY_VERSION);
self.legacy_subtasks(params)
}
_ => {
Expand Down Expand Up @@ -461,7 +469,7 @@ where
self.spawn_handle
.spawn_essential("aleph/session_authority", async move {
if subtasks.wait_completion().await.is_err() {
warn!(target: "aleph-party", "Authority subtasks failed.");
warn!(target: LOG_TARGET, "Authority subtasks failed.");
}
}),
node_id,
Expand Down
2 changes: 2 additions & 0 deletions finality-aleph/src/party/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ pub mod traits;
#[cfg(test)]
mod mocks;

const LOG_TARGET: &str = "aleph-party";

pub(crate) struct ConsensusPartyParams<CS, NSM> {
pub session_authorities: ReadOnlySessionMap,
pub chain_state: CS,
Expand Down
2 changes: 1 addition & 1 deletion primitives/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ pub const DEFAULT_FINALITY_VERSION: Version = 0;
pub const CURRENT_FINALITY_VERSION: u16 = LEGACY_FINALITY_VERSION + 1;

/// Legacy version of abft.
pub const LEGACY_FINALITY_VERSION: u16 = 3;
pub const LEGACY_FINALITY_VERSION: u16 = 4;

/// Percentage of validator performance that is treated as 100% performance
pub const LENIENT_THRESHOLD: Perquintill = Perquintill::from_percent(90);
Expand Down
13 changes: 12 additions & 1 deletion scripts/run_nodes.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ Usage:
[-p|--base-path BASE_PATH]
if specified, use given base path (keystore, db, AlephBFT backups)
if not specified, base path is ./run-nodes-local
[--finality-version]
which finality version should be used, default = legacy
[--dont-bootstrap]
set if you don't want to bootstrap chain, ie generate keystore and chainspec
[--dont-build]
Expand All @@ -85,6 +87,7 @@ DONT_BOOTSTRAP=${DONT_BOOTSTRAP:-""}
DONT_BUILD_ALEPH_NODE=${DONT_BUILD_ALEPH_NODE:-""}
DONT_DELETE_DB=${DONT_DELETE_DB:-""}
DONT_REMOVE_ABFT_BACKUPS=${DONT_REMOVE_ABFT_BACKUPS:-""}
FINALITY_VERSION=${FINALITY_VERSION:-"legacy"}

while [[ $# -gt 0 ]]; do
case "$1" in
Expand All @@ -100,6 +103,10 @@ while [[ $# -gt 0 ]]; do
BASE_PATH="$2"
shift;shift
;;
--finality-version)
FINALITY_VERSION="$2"
shift;shift
;;
--dont-bootstrap)
DONT_BOOTSTRAP="true"
shift
Expand Down Expand Up @@ -219,6 +226,9 @@ fi
if ! command -v jq &> /dev/null; then
error "jq could not be found on PATH!"
fi
if [[ "${FINALITY_VERSION}" != "current" && "${FINALITY_VERSION}" != "legacy" ]]; then
error "Flag finality-version should be either current or legacy."
fi

# ------------------- main script starts here ------------------------------

Expand Down Expand Up @@ -276,7 +286,8 @@ if [[ -z "${DONT_BOOTSTRAP}" ]]; then
--account-ids "${all_account_ids_string}" \
--authorities-account-ids "${validator_ids_string}" \
--chain-type local > "${BASE_PATH}/chainspec.json" \
--rich-account-ids "${all_account_ids_string}"
--rich-account-ids "${all_account_ids_string}" \
--finality-version "${FINALITY_VERSION}"

if [[ "${DONT_REMOVE_ABFT_BACKUPS}" == "true" ]]; then
all_account_ids=(${validator_account_ids[@]} ${rpc_node_account_ids[@]})
Expand Down

0 comments on commit 0e6fa63

Please sign in to comment.