diff --git a/src/cli.rs b/src/cli.rs index b4ce2a78e..fc92eda12 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -223,6 +223,7 @@ impl Cli { #[cfg(target_os = "linux")] let mut sig_term_fut = signal::unix::signal(signal::unix::SignalKind::terminate())?; + crate::metrics::shutdown_initiated().set(false as _); tokio::spawn(async move { #[cfg(target_os = "linux")] let sig_term = sig_term_fut.recv(); @@ -234,6 +235,7 @@ impl Cli { _ = sig_term => "SIGTERM", }; + crate::metrics::shutdown_initiated().set(true as _); tracing::info!(%signal, "shutting down from signal"); // Don't unwrap in order to ensure that we execute // any subsequent shutdown tasks. diff --git a/src/components/proxy/io_uring_shared.rs b/src/components/proxy/io_uring_shared.rs index 40c1c8f68..7593db7fd 100644 --- a/src/components/proxy/io_uring_shared.rs +++ b/src/components/proxy/io_uring_shared.rs @@ -445,6 +445,7 @@ impl IoUringLoop { std::thread::Builder::new() .name(thread_name) .spawn(move || { + crate::metrics::game_traffic_tasks().inc(); let _guard = tracing::dispatcher::set_default(&dispatcher); let tokens = slab::Slab::with_capacity(concurrent_sends + 1 + 1); @@ -490,14 +491,14 @@ impl IoUringLoop { } Err(error) => { tracing::error!(%error, "io-uring submit_and_wait failed"); - return; + break 'io; } } cq.sync(); if let Err(error) = loop_ctx.process_backlog(&submitter) { tracing::error!(%error, "failed to process io-uring backlog"); - return; + break 'io; } // Now actually process all of the completed io requests @@ -570,6 +571,8 @@ impl IoUringLoop { loop_ctx.sync(); } + + crate::metrics::game_traffic_task_closed().inc(); })?; Ok(()) diff --git a/src/components/proxy/packet_router/reference.rs b/src/components/proxy/packet_router/reference.rs index 694d5eae6..efc441ef9 100644 --- a/src/components/proxy/packet_router/reference.rs +++ b/src/components/proxy/packet_router/reference.rs @@ -37,6 +37,7 @@ impl super::DownstreamReceiveWorkerConfig { let (tx, mut rx) = tokio::sync::oneshot::channel(); let worker = uring_spawn!(thread_span, async move { + crate::metrics::game_traffic_tasks().inc(); let mut last_received_at = None; let socket = crate::net::DualStackLocalSocket::new(port) .unwrap() @@ -143,6 +144,7 @@ impl super::DownstreamReceiveWorkerConfig { } } _ = &mut rx => { + crate::metrics::game_traffic_task_closed().inc(); tracing::debug!("Closing downstream socket loop, shutdown requested"); return; } diff --git a/src/metrics.rs b/src/metrics.rs index beeb8820b..8df4cd6cd 100644 --- a/src/metrics.rs +++ b/src/metrics.rs @@ -135,6 +135,97 @@ impl<'a> From> for AsnInfo<'a> { } } +pub(crate) fn shutdown_initiated() -> &'static IntGauge { + static SHUTDOWN_INITATED: Lazy = Lazy::new(|| { + prometheus::register_int_gauge_with_registry! { + prometheus::opts! { + "shutdown_initiated", + "Shutdown process has been started", + }, + registry(), + } + .unwrap() + }); + + &SHUTDOWN_INITATED +} + +pub(crate) fn game_traffic_tasks() -> &'static IntCounter { + static GAME_TRAFFIC_TASKS: Lazy = Lazy::new(|| { + prometheus::register_int_counter_with_registry! { + prometheus::opts! { + "game_traffic_tasks", + "The amount of game traffic tasks that have spawned", + }, + registry(), + } + .unwrap() + }); + + &GAME_TRAFFIC_TASKS +} + +pub(crate) fn game_traffic_task_closed() -> &'static IntCounter { + static GAME_TRAFFIC_TASK_CLOSED: Lazy = Lazy::new(|| { + prometheus::register_int_counter_with_registry! { + prometheus::opts! { + "game_traffic_task_closed", + "The amount of game traffic tasks that have shutdown", + }, + registry(), + } + .unwrap() + }); + + &GAME_TRAFFIC_TASK_CLOSED +} + +pub(crate) fn phoenix_requests() -> &'static IntCounter { + static PHOENIX_REQUESTS: Lazy = Lazy::new(|| { + prometheus::register_int_counter_with_registry! { + prometheus::opts! { + "phoenix_requests", + "The amount of phoenix requests", + }, + registry(), + } + .unwrap() + }); + + &PHOENIX_REQUESTS +} + +pub(crate) fn phoenix_task_closed() -> &'static IntGauge { + static PHOENIX_TASK_CLOSED: Lazy = Lazy::new(|| { + prometheus::register_int_gauge_with_registry! { + prometheus::opts! { + "phoenix_task_closed", + "Whether the phoenix task has shutdown", + }, + registry(), + } + .unwrap() + }); + + &PHOENIX_TASK_CLOSED +} + +pub(crate) fn phoenix_server_errors(error: &str) -> IntCounter { + static PHOENIX_SERVER_ERRORS: Lazy = Lazy::new(|| { + prometheus::register_int_counter_vec_with_registry! { + prometheus::opts! { + "phoenix_server_errors", + "The amount of errors attempting to spawn the phoenix HTTP server", + }, + &["error"], + registry(), + } + .unwrap() + }); + + PHOENIX_SERVER_ERRORS.with_label_values(&[error]) +} + pub(crate) fn processing_time(direction: Direction) -> Histogram { static PROCESSING_TIME: Lazy = Lazy::new(|| { prometheus::register_histogram_vec_with_registry! { diff --git a/src/net/phoenix.rs b/src/net/phoenix.rs index 126302564..15d35d789 100644 --- a/src/net/phoenix.rs +++ b/src/net/phoenix.rs @@ -107,6 +107,7 @@ pub fn spawn( "application/json", ); + crate::metrics::phoenix_requests().inc(); tracing::trace!("serving phoenix request"); Ok::<_, std::convert::Infallible>( Response::builder() @@ -127,13 +128,16 @@ pub fn spawn( .serve_connection(conn, svc) .await { + let error_display = err.to_string(); + crate::metrics::phoenix_server_errors(&error_display).inc(); tracing::error!( - "failed to reponse to phoenix request: {err}" + "failed to respond to phoenix request: {error_display}" ); } }); } _ = &mut srx => { + crate::metrics::phoenix_task_closed().set(true as _); tracing::info!("shutting down phoenix HTTP service"); break; }