Skip to content

Commit

Permalink
redacted and full telemetry servers (#164)
Browse files Browse the repository at this point in the history
- ~Creates a new module `server` inside `live_builder`~
- ~Server exposes a single fn `start`, which starts an actix server on
the given port~
- ~Initial server implementation just exposes one route `/health` that
responds 200~
- ~Adds new base config item `live_builder_server_port`~
- ~Server is spawned when running the `live_builder` cli~

## Background 

- TDX builders cannot expose the existing telemetry server port, due to
concerns about sensitive operational information leakage. This
information may require a time-delay or additional sanitisation before
being exposed outside of the secure machine.
- TDX builders require some other means to know if the builder is
healthy or not.

## Solution

- Separated the telemetry server into two servers: `full` and
`redacted`.
- `full`: what we have now, exposes all operational data without regard
for whether it is sensitive
- `redacted`: new server that initially only exposes a healthcheck
endpoint (can be extended later). This server is safe to always be
exposed by tdx builders.
  • Loading branch information
liamaharon authored Sep 10, 2024
1 parent ccf4677 commit 59e7cd3
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 82 deletions.
6 changes: 4 additions & 2 deletions config-live-example.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
log_json = true
log_level = "info,rbuilder=debug"
telemetry_port = 6060
telemetry_ip = "0.0.0.0"
redacted_telemetry_server_port = 6061
redacted_telemetry_server_ip = "0.0.0.0"
full_telemetry_server_port = 6060
full_telemetry_server_ip = "0.0.0.0"

chain = "mainnet"
reth_datadir = "/mnt/data/reth"
Expand Down
6 changes: 4 additions & 2 deletions config-playground.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
log_json = false
log_level = "info,rbuilder=debug"
telemetry_port = 6060
telemetry_ip = "0.0.0.0"
redacted_telemetry_server_port = 6061
redacted_telemetry_server_ip = "0.0.0.0"
full_telemetry_server_port = 6060
full_telemetry_server_ip = "0.0.0.0"

chain = "$HOME/.playground/devnet/genesis.json"
reth_datadir = "$HOME/.playground/devnet/data_reth"
Expand Down
34 changes: 26 additions & 8 deletions crates/rbuilder/src/live_builder/base_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,10 @@ const ENV_PREFIX: &str = "env:";
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
#[serde(default, deny_unknown_fields)]
pub struct BaseConfig {
pub telemetry_port: u16,
pub telemetry_ip: Option<String>,
pub full_telemetry_server_port: u16,
pub full_telemetry_server_ip: Option<String>,
pub redacted_telemetry_server_port: u16,
pub redacted_telemetry_server_ip: Option<String>,
pub log_json: bool,
log_level: EnvOrValue<String>,
pub log_color: bool,
Expand Down Expand Up @@ -141,8 +143,18 @@ impl BaseConfig {
Ok(())
}

pub fn telemetry_address(&self) -> SocketAddr {
SocketAddr::V4(SocketAddrV4::new(self.telemetry_ip(), self.telemetry_port))
pub fn redacted_telemetry_server_address(&self) -> SocketAddr {
SocketAddr::V4(SocketAddrV4::new(
self.redacted_telemetry_server_ip(),
self.redacted_telemetry_server_port,
))
}

pub fn full_telemetry_server_address(&self) -> SocketAddr {
SocketAddr::V4(SocketAddrV4::new(
self.full_telemetry_server_ip(),
self.full_telemetry_server_port,
))
}

/// WARN: opens reth db
Expand Down Expand Up @@ -201,8 +213,12 @@ impl BaseConfig {
parse_ip(&self.jsonrpc_server_ip)
}

pub fn telemetry_ip(&self) -> Ipv4Addr {
parse_ip(&self.telemetry_ip)
pub fn redacted_telemetry_server_ip(&self) -> Ipv4Addr {
parse_ip(&self.redacted_telemetry_server_ip)
}

pub fn full_telemetry_server_ip(&self) -> Ipv4Addr {
parse_ip(&self.full_telemetry_server_ip)
}

pub fn chain_spec(&self) -> eyre::Result<Arc<ChainSpec>> {
Expand Down Expand Up @@ -366,8 +382,10 @@ pub const DEFAULT_RETH_DB_PATH: &str = "/mnt/data/reth";
impl Default for BaseConfig {
fn default() -> Self {
Self {
telemetry_port: 6069,
telemetry_ip: None,
full_telemetry_server_port: 6069,
full_telemetry_server_ip: None,
redacted_telemetry_server_port: 6070,
redacted_telemetry_server_ip: None,
log_json: false,
log_level: "info".into(),
log_color: false,
Expand Down
11 changes: 8 additions & 3 deletions crates/rbuilder/src/live_builder/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::{
live_builder::{
base_config::load_config_toml_and_env, payload_events::MevBoostSlotDataGenerator,
},
telemetry::spawn_telemetry_server,
telemetry,
utils::build_info::Version,
};

Expand Down Expand Up @@ -80,8 +80,13 @@ pub async fn run<ConfigType: LiveBuilderConfig>(

let cancel = CancellationToken::new();

spawn_telemetry_server(
config.base_config().telemetry_address(),
// Spawn redacted server that is safe for tdx builders to expose
telemetry::servers::redacted::spawn(config.base_config().redacted_telemetry_server_address())
.await?;

// Spawn debug server that exposes detailed operational information
telemetry::servers::full::spawn(
config.base_config().full_telemetry_server_address(),
config.version_for_telemetry(),
)
.await?;
Expand Down
73 changes: 6 additions & 67 deletions crates/rbuilder/src/telemetry/mod.rs
Original file line number Diff line number Diff line change
@@ -1,73 +1,12 @@
//! Telemetry helps track what is happening in the running application using metrics and tracing.
//! Telemetry modules helps tracking what is happening in the rbuilder.
//!
//! Interface to telemetry should be set of simple functions like:
//! fn record_event(event_data)
//! All internals are global variables.
use serde::Deserialize;
use std::{net::SocketAddr, path::PathBuf};
use tracing::{info, warn};
use warp::{Filter, Rejection, Reply};
//! The redacted server is seperate from the full server, because it may be desirable
//! to expose full and redacted data differently in tdx builders. e.g. redacted data
//! immediately avaliable, and full data avaliable after a delay or some seperate sanitisation.
mod dynamic_logs;
pub mod metrics;
mod metrics;
pub mod servers;

pub use dynamic_logs::*;
pub use metrics::*;

use crate::utils::build_info::Version;

async fn metrics_handler() -> Result<impl Reply, Rejection> {
Ok(gather_prometheus_metrics())
}

#[derive(Debug, Deserialize)]
struct LogQuery {
file: Option<PathBuf>,
}

async fn set_rust_log_handle(
rust_log: String,
log_query: LogQuery,
) -> Result<impl Reply, Rejection> {
info!(?rust_log, ?log_query, "Setting log level");
let mut log_config = default_log_config();
log_config.file.clone_from(&log_query.file);
log_config.env_filter.clone_from(&rust_log);
match set_log_config(log_config) {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, ?rust_log, ?log_query, "Failed to set log level");
Ok(err.to_string())
}
}
}

async fn reset_log_handle() -> Result<impl Reply, Rejection> {
info!("Resetting log level");
match reset_log_config() {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, "Failed to reset log level");
Ok(err.to_string())
}
}
}

pub async fn spawn_telemetry_server(addr: SocketAddr, version: Version) -> eyre::Result<()> {
set_version(version);

// metrics over /debug/metrics/prometheus
let metrics_route = warp::path!("debug" / "metrics" / "prometheus").and_then(metrics_handler);

let log_set_route = warp::path!("debug" / "log" / "set" / String)
.and(warp::query::<LogQuery>())
.and_then(set_rust_log_handle);
let log_reset_route = warp::path!("debug" / "log" / "reset").and_then(reset_log_handle);

let route = metrics_route.or(log_set_route).or(log_reset_route);

tokio::spawn(warp::serve(route).run(addr));

Ok(())
}
76 changes: 76 additions & 0 deletions crates/rbuilder/src/telemetry/servers/full.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
//! Telemetry helps track what is happening in the running application using metrics and tracing.
//!
//! Interface to telemetry should be set of simple functions like:
//! fn record_event(event_data)
//!
//! All internals are global variables.
//!
//! Full server may expose metrics that could leak information when running tdx.
use serde::Deserialize;
use std::{net::SocketAddr, path::PathBuf};
use tracing::{info, warn};
use warp::{Filter, Rejection, Reply};

use crate::{
telemetry::{
dynamic_logs::{default_log_config, reset_log_config, set_log_config},
metrics::{gather_prometheus_metrics, set_version},
},
utils::build_info::Version,
};

pub async fn spawn(addr: SocketAddr, version: Version) -> eyre::Result<()> {
set_version(version);

// metrics over /debug/metrics/prometheus
let metrics_route = warp::path!("debug" / "metrics" / "prometheus").and_then(metrics_handler);

let log_set_route = warp::path!("debug" / "log" / "set" / String)
.and(warp::query::<LogQuery>())
.and_then(set_rust_log_handle);
let log_reset_route = warp::path!("debug" / "log" / "reset").and_then(reset_log_handle);

let route = metrics_route.or(log_set_route).or(log_reset_route);

tokio::spawn(warp::serve(route).run(addr));

Ok(())
}

async fn metrics_handler() -> Result<impl Reply, Rejection> {
Ok(gather_prometheus_metrics())
}

#[derive(Debug, Deserialize)]
struct LogQuery {
file: Option<PathBuf>,
}

async fn set_rust_log_handle(
rust_log: String,
log_query: LogQuery,
) -> Result<impl Reply, Rejection> {
info!(?rust_log, ?log_query, "Setting log level");
let mut log_config = default_log_config();
log_config.file.clone_from(&log_query.file);
log_config.env_filter.clone_from(&rust_log);
match set_log_config(log_config) {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, ?rust_log, ?log_query, "Failed to set log level");
Ok(err.to_string())
}
}
}

async fn reset_log_handle() -> Result<impl Reply, Rejection> {
info!("Resetting log level");
match reset_log_config() {
Ok(_) => Ok("".to_string()),
Err(err) => {
warn!(?err, "Failed to reset log level");
Ok(err.to_string())
}
}
}
13 changes: 13 additions & 0 deletions crates/rbuilder/src/telemetry/servers/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
//! Telemetry contains two servers.
//!
//! - [full]: verbose server exposing detailed operational information about the
//! builder.
//! - [redacted]: deliberately redacted server serves information suitable for
//! tdx builders to expose in real-time.
//!
//! The redacted server is seperate from the debug server because it may be desirable
//! to expose debug and redacted data differently in tdx builders. e.g. redacted data
//! immediately avaliable, debug data avaliable after a delay or some seperate sanitisation.
pub mod full;
pub mod redacted;
19 changes: 19 additions & 0 deletions crates/rbuilder/src/telemetry/servers/redacted.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//! Server that only exposes redacted data, suitable for being exposed by tdx
//! builders in real-time.
//!
//! Currently exposes just a healthcheck endpoint on /health. Can be extended
//! in the future.
use std::net::SocketAddr;

use warp::{Filter, Rejection, Reply};

async fn handler() -> Result<impl Reply, Rejection> {
Ok("OK")
}

pub async fn spawn(addr: SocketAddr) -> eyre::Result<()> {
let route = warp::path!("health").and_then(handler);
tokio::spawn(warp::serve(route).run(addr));
Ok(())
}

0 comments on commit 59e7cd3

Please sign in to comment.