Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

redacted and full telemetry servers #164

Merged
merged 4 commits into from
Sep 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions config-live-example.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
log_json = true
log_level = "info,rbuilder=debug"
telemetry_port = 6060
telemetry_ip = "0.0.0.0"
redacted_telemetry_server_port = 6061
redacted_telemetry_server_ip = "0.0.0.0"
full_telemetry_server_port = 6060
full_telemetry_server_ip = "0.0.0.0"

chain = "mainnet"
reth_datadir = "/mnt/data/reth"
Expand Down
6 changes: 4 additions & 2 deletions config-playground.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
log_json = false
log_level = "info,rbuilder=debug"
telemetry_port = 6060
telemetry_ip = "0.0.0.0"
redacted_telemetry_server_port = 6061
redacted_telemetry_server_ip = "0.0.0.0"
full_telemetry_server_port = 6060
full_telemetry_server_ip = "0.0.0.0"

chain = "$HOME/.playground/devnet/genesis.json"
reth_datadir = "$HOME/.playground/devnet/data_reth"
Expand Down
34 changes: 26 additions & 8 deletions crates/rbuilder/src/live_builder/base_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ const ENV_PREFIX: &str = "env:";
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
#[serde(default, deny_unknown_fields)]
pub struct BaseConfig {
pub telemetry_port: u16,
pub telemetry_ip: Option<String>,
pub full_telemetry_server_port: u16,
pub full_telemetry_server_ip: Option<String>,
pub redacted_telemetry_server_port: u16,
pub redacted_telemetry_server_ip: Option<String>,
pub log_json: bool,
log_level: EnvOrValue<String>,
pub log_color: bool,
Expand Down Expand Up @@ -141,8 +143,18 @@ impl BaseConfig {
Ok(())
}

pub fn telemetry_address(&self) -> SocketAddr {
SocketAddr::V4(SocketAddrV4::new(self.telemetry_ip(), self.telemetry_port))
pub fn redacted_telemetry_server_address(&self) -> SocketAddr {
SocketAddr::V4(SocketAddrV4::new(
self.redacted_telemetry_server_ip(),
self.redacted_telemetry_server_port,
))
}

pub fn full_telemetry_server_address(&self) -> SocketAddr {
SocketAddr::V4(SocketAddrV4::new(
self.full_telemetry_server_ip(),
self.full_telemetry_server_port,
))
}

/// WARN: opens reth db
Expand Down Expand Up @@ -201,8 +213,12 @@ impl BaseConfig {
parse_ip(&self.jsonrpc_server_ip)
}

pub fn telemetry_ip(&self) -> Ipv4Addr {
parse_ip(&self.telemetry_ip)
pub fn redacted_telemetry_server_ip(&self) -> Ipv4Addr {
parse_ip(&self.redacted_telemetry_server_ip)
}

pub fn full_telemetry_server_ip(&self) -> Ipv4Addr {
parse_ip(&self.full_telemetry_server_ip)
}

pub fn chain_spec(&self) -> eyre::Result<Arc<ChainSpec>> {
Expand Down Expand Up @@ -366,8 +382,10 @@ pub const DEFAULT_RETH_DB_PATH: &str = "/mnt/data/reth";
impl Default for BaseConfig {
fn default() -> Self {
Self {
telemetry_port: 6069,
telemetry_ip: None,
full_telemetry_server_port: 6069,
full_telemetry_server_ip: None,
redacted_telemetry_server_port: 6070,
redacted_telemetry_server_ip: None,
log_json: false,
log_level: "info".into(),
log_color: false,
Expand Down
11 changes: 8 additions & 3 deletions crates/rbuilder/src/live_builder/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{
live_builder::{
base_config::load_config_toml_and_env, payload_events::MevBoostSlotDataGenerator,
},
telemetry::spawn_telemetry_server,
telemetry,
utils::build_info::Version,
};

Expand Down Expand Up @@ -80,8 +80,13 @@ pub async fn run<ConfigType: LiveBuilderConfig>(

let cancel = CancellationToken::new();

spawn_telemetry_server(
config.base_config().telemetry_address(),
// Spawn redacted server that is safe for tdx builders to expose
telemetry::servers::redacted::spawn(config.base_config().redacted_telemetry_server_address())
.await?;

// Spawn debug server that exposes detailed operational information
telemetry::servers::full::spawn(
config.base_config().full_telemetry_server_address(),
config.version_for_telemetry(),
)
.await?;
Expand Down
73 changes: 6 additions & 67 deletions crates/rbuilder/src/telemetry/mod.rs
Original file line number Diff line number Diff line change
@@ -1,73 +1,12 @@
//! Telemetry helps track what is happening in the running application using metrics and tracing.
//! Telemetry modules helps tracking what is happening in the rbuilder.
//!
//! Interface to telemetry should be set of simple functions like:
//! fn record_event(event_data)
//! All internals are global variables.

use serde::Deserialize;
use std::{net::SocketAddr, path::PathBuf};
use tracing::{info, warn};
use warp::{Filter, Rejection, Reply};
//! The redacted server is seperate from the full server, because it may be desirable
//! to expose full and redacted data differently in tdx builders. e.g. redacted data
//! immediately avaliable, and full data avaliable after a delay or some seperate sanitisation.

mod dynamic_logs;
pub mod metrics;
mod metrics;
pub mod servers;

pub use dynamic_logs::*;
pub use metrics::*;

use crate::utils::build_info::Version;

async fn metrics_handler() -> Result<impl Reply, Rejection> {
Ok(gather_prometheus_metrics())
}

#[derive(Debug, Deserialize)]
struct LogQuery {
file: Option<PathBuf>,
}

async fn set_rust_log_handle(
rust_log: String,
log_query: LogQuery,
) -> Result<impl Reply, Rejection> {
info!(?rust_log, ?log_query, "Setting log level");
let mut log_config = default_log_config();
log_config.file.clone_from(&log_query.file);
log_config.env_filter.clone_from(&rust_log);
match set_log_config(log_config) {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, ?rust_log, ?log_query, "Failed to set log level");
Ok(err.to_string())
}
}
}

async fn reset_log_handle() -> Result<impl Reply, Rejection> {
info!("Resetting log level");
match reset_log_config() {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, "Failed to reset log level");
Ok(err.to_string())
}
}
}

pub async fn spawn_telemetry_server(addr: SocketAddr, version: Version) -> eyre::Result<()> {
set_version(version);

// metrics over /debug/metrics/prometheus
let metrics_route = warp::path!("debug" / "metrics" / "prometheus").and_then(metrics_handler);

let log_set_route = warp::path!("debug" / "log" / "set" / String)
.and(warp::query::<LogQuery>())
.and_then(set_rust_log_handle);
let log_reset_route = warp::path!("debug" / "log" / "reset").and_then(reset_log_handle);

let route = metrics_route.or(log_set_route).or(log_reset_route);

tokio::spawn(warp::serve(route).run(addr));

Ok(())
}
76 changes: 76 additions & 0 deletions crates/rbuilder/src/telemetry/servers/full.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
//! Telemetry helps track what is happening in the running application using metrics and tracing.
//!
//! Interface to telemetry should be set of simple functions like:
//! fn record_event(event_data)
//!
//! All internals are global variables.
//!
//! Full server may expose metrics that could leak information when running tdx.

use serde::Deserialize;
use std::{net::SocketAddr, path::PathBuf};
use tracing::{info, warn};
use warp::{Filter, Rejection, Reply};

use crate::{
telemetry::{
dynamic_logs::{default_log_config, reset_log_config, set_log_config},
metrics::{gather_prometheus_metrics, set_version},
},
utils::build_info::Version,
};

pub async fn spawn(addr: SocketAddr, version: Version) -> eyre::Result<()> {
set_version(version);

// metrics over /debug/metrics/prometheus
let metrics_route = warp::path!("debug" / "metrics" / "prometheus").and_then(metrics_handler);

let log_set_route = warp::path!("debug" / "log" / "set" / String)
.and(warp::query::<LogQuery>())
.and_then(set_rust_log_handle);
let log_reset_route = warp::path!("debug" / "log" / "reset").and_then(reset_log_handle);

let route = metrics_route.or(log_set_route).or(log_reset_route);

tokio::spawn(warp::serve(route).run(addr));

Ok(())
}

async fn metrics_handler() -> Result<impl Reply, Rejection> {
Ok(gather_prometheus_metrics())
}

#[derive(Debug, Deserialize)]
struct LogQuery {
file: Option<PathBuf>,
}

async fn set_rust_log_handle(
rust_log: String,
log_query: LogQuery,
) -> Result<impl Reply, Rejection> {
info!(?rust_log, ?log_query, "Setting log level");
let mut log_config = default_log_config();
log_config.file.clone_from(&log_query.file);
log_config.env_filter.clone_from(&rust_log);
match set_log_config(log_config) {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, ?rust_log, ?log_query, "Failed to set log level");
Ok(err.to_string())
}
}
}

async fn reset_log_handle() -> Result<impl Reply, Rejection> {
info!("Resetting log level");
match reset_log_config() {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, "Failed to reset log level");
Ok(err.to_string())
}
}
}
13 changes: 13 additions & 0 deletions crates/rbuilder/src/telemetry/servers/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//! Telemetry contains two servers.
//!
//! - [full]: verbose server exposing detailed operational information about the
//! builder.
//! - [redacted]: deliberately redacted server serves information suitable for
//! tdx builders to expose in real-time.
//!
//! The redacted server is seperate from the debug server because it may be desirable
//! to expose debug and redacted data differently in tdx builders. e.g. redacted data
//! immediately avaliable, debug data avaliable after a delay or some seperate sanitisation.

pub mod full;
pub mod redacted;
19 changes: 19 additions & 0 deletions crates/rbuilder/src/telemetry/servers/redacted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//! Server that only exposes redacted data, suitable for being exposed by tdx
//! builders in real-time.
//!
//! Currently exposes just a healthcheck endpoint on /health. Can be extended
//! in the future.

use std::net::SocketAddr;

use warp::{Filter, Rejection, Reply};

async fn handler() -> Result<impl Reply, Rejection> {
Ok("OK")
}

pub async fn spawn(addr: SocketAddr) -> eyre::Result<()> {
let route = warp::path!("health").and_then(handler);
tokio::spawn(warp::serve(route).run(addr));
Ok(())
}
Loading