-
Notifications
You must be signed in to change notification settings - Fork 108
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- ~Creates a new module `server` inside `live_builder`~ - ~Server exposes a single fn `start`, which starts an actix server on the given port~ - ~Initial server implementation just exposes one route `/health` that responds 200~ - ~Adds new base config item `live_builder_server_port`~ - ~Server is spawned when running the `live_builder` cli~ ## Background - TDX builders cannot expose the existing telemetry server port, due to concerns about sensitive operational information leakage. This information may require a time-delay or additional sanitisation before being exposed outside of the secure machine. - TDX builders require some other means to know if the builder is healthy or not. ## Solution - Separated the telemetry server into two servers: `full` and `redacted`. - `full`: what we have now, exposes all operational data without regard for whether it is sensitive - `redacted`: new server that initially only exposes a healthcheck endpoint (can be extended later). This server is safe to always be exposed by tdx builders.
- Loading branch information
1 parent
ccf4677
commit 59e7cd3
Showing
8 changed files
with
156 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,73 +1,12 @@ | ||
//! Telemetry helps track what is happening in the running application using metrics and tracing. | ||
//! Telemetry modules helps tracking what is happening in the rbuilder. | ||
//! | ||
//! Interface to telemetry should be set of simple functions like: | ||
//! fn record_event(event_data) | ||
//! All internals are global variables. | ||
use serde::Deserialize; | ||
use std::{net::SocketAddr, path::PathBuf}; | ||
use tracing::{info, warn}; | ||
use warp::{Filter, Rejection, Reply}; | ||
//! The redacted server is seperate from the full server, because it may be desirable | ||
//! to expose full and redacted data differently in tdx builders. e.g. redacted data | ||
//! immediately avaliable, and full data avaliable after a delay or some seperate sanitisation. | ||
mod dynamic_logs; | ||
pub mod metrics; | ||
mod metrics; | ||
pub mod servers; | ||
|
||
pub use dynamic_logs::*; | ||
pub use metrics::*; | ||
|
||
use crate::utils::build_info::Version; | ||
|
||
async fn metrics_handler() -> Result<impl Reply, Rejection> { | ||
Ok(gather_prometheus_metrics()) | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct LogQuery { | ||
file: Option<PathBuf>, | ||
} | ||
|
||
async fn set_rust_log_handle( | ||
rust_log: String, | ||
log_query: LogQuery, | ||
) -> Result<impl Reply, Rejection> { | ||
info!(?rust_log, ?log_query, "Setting log level"); | ||
let mut log_config = default_log_config(); | ||
log_config.file.clone_from(&log_query.file); | ||
log_config.env_filter.clone_from(&rust_log); | ||
match set_log_config(log_config) { | ||
Ok(_) => Ok("".to_string()), | ||
Err(err) => { | ||
warn!(?err, ?rust_log, ?log_query, "Failed to set log level"); | ||
Ok(err.to_string()) | ||
} | ||
} | ||
} | ||
|
||
async fn reset_log_handle() -> Result<impl Reply, Rejection> { | ||
info!("Resetting log level"); | ||
match reset_log_config() { | ||
Ok(_) => Ok("".to_string()), | ||
Err(err) => { | ||
warn!(?err, "Failed to reset log level"); | ||
Ok(err.to_string()) | ||
} | ||
} | ||
} | ||
|
||
pub async fn spawn_telemetry_server(addr: SocketAddr, version: Version) -> eyre::Result<()> { | ||
set_version(version); | ||
|
||
// metrics over /debug/metrics/prometheus | ||
let metrics_route = warp::path!("debug" / "metrics" / "prometheus").and_then(metrics_handler); | ||
|
||
let log_set_route = warp::path!("debug" / "log" / "set" / String) | ||
.and(warp::query::<LogQuery>()) | ||
.and_then(set_rust_log_handle); | ||
let log_reset_route = warp::path!("debug" / "log" / "reset").and_then(reset_log_handle); | ||
|
||
let route = metrics_route.or(log_set_route).or(log_reset_route); | ||
|
||
tokio::spawn(warp::serve(route).run(addr)); | ||
|
||
Ok(()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
//! Telemetry helps track what is happening in the running application using metrics and tracing. | ||
//! | ||
//! Interface to telemetry should be set of simple functions like: | ||
//! fn record_event(event_data) | ||
//! | ||
//! All internals are global variables. | ||
//! | ||
//! Full server may expose metrics that could leak information when running tdx. | ||
use serde::Deserialize; | ||
use std::{net::SocketAddr, path::PathBuf}; | ||
use tracing::{info, warn}; | ||
use warp::{Filter, Rejection, Reply}; | ||
|
||
use crate::{ | ||
telemetry::{ | ||
dynamic_logs::{default_log_config, reset_log_config, set_log_config}, | ||
metrics::{gather_prometheus_metrics, set_version}, | ||
}, | ||
utils::build_info::Version, | ||
}; | ||
|
||
pub async fn spawn(addr: SocketAddr, version: Version) -> eyre::Result<()> { | ||
set_version(version); | ||
|
||
// metrics over /debug/metrics/prometheus | ||
let metrics_route = warp::path!("debug" / "metrics" / "prometheus").and_then(metrics_handler); | ||
|
||
let log_set_route = warp::path!("debug" / "log" / "set" / String) | ||
.and(warp::query::<LogQuery>()) | ||
.and_then(set_rust_log_handle); | ||
let log_reset_route = warp::path!("debug" / "log" / "reset").and_then(reset_log_handle); | ||
|
||
let route = metrics_route.or(log_set_route).or(log_reset_route); | ||
|
||
tokio::spawn(warp::serve(route).run(addr)); | ||
|
||
Ok(()) | ||
} | ||
|
||
async fn metrics_handler() -> Result<impl Reply, Rejection> { | ||
Ok(gather_prometheus_metrics()) | ||
} | ||
|
||
#[derive(Debug, Deserialize)] | ||
struct LogQuery { | ||
file: Option<PathBuf>, | ||
} | ||
|
||
async fn set_rust_log_handle( | ||
rust_log: String, | ||
log_query: LogQuery, | ||
) -> Result<impl Reply, Rejection> { | ||
info!(?rust_log, ?log_query, "Setting log level"); | ||
let mut log_config = default_log_config(); | ||
log_config.file.clone_from(&log_query.file); | ||
log_config.env_filter.clone_from(&rust_log); | ||
match set_log_config(log_config) { | ||
Ok(_) => Ok("".to_string()), | ||
Err(err) => { | ||
warn!(?err, ?rust_log, ?log_query, "Failed to set log level"); | ||
Ok(err.to_string()) | ||
} | ||
} | ||
} | ||
|
||
async fn reset_log_handle() -> Result<impl Reply, Rejection> { | ||
info!("Resetting log level"); | ||
match reset_log_config() { | ||
Ok(_) => Ok("".to_string()), | ||
Err(err) => { | ||
warn!(?err, "Failed to reset log level"); | ||
Ok(err.to_string()) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
//! Telemetry contains two servers. | ||
//! | ||
//! - [full]: verbose server exposing detailed operational information about the | ||
//! builder. | ||
//! - [redacted]: deliberately redacted server serves information suitable for | ||
//! tdx builders to expose in real-time. | ||
//! | ||
//! The redacted server is seperate from the debug server because it may be desirable | ||
//! to expose debug and redacted data differently in tdx builders. e.g. redacted data | ||
//! immediately avaliable, debug data avaliable after a delay or some seperate sanitisation. | ||
pub mod full; | ||
pub mod redacted; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
//! Server that only exposes redacted data, suitable for being exposed by tdx | ||
//! builders in real-time. | ||
//! | ||
//! Currently exposes just a healthcheck endpoint on /health. Can be extended | ||
//! in the future. | ||
use std::net::SocketAddr; | ||
|
||
use warp::{Filter, Rejection, Reply}; | ||
|
||
async fn handler() -> Result<impl Reply, Rejection> { | ||
Ok("OK") | ||
} | ||
|
||
pub async fn spawn(addr: SocketAddr) -> eyre::Result<()> { | ||
let route = warp::path!("health").and_then(handler); | ||
tokio::spawn(warp::serve(route).run(addr)); | ||
Ok(()) | ||
} |