Skip to content

Commit

Permalink
Implement a 120s timeout in case lctl process is taking too long
Browse files Browse the repository at this point in the history
  • Loading branch information
RDruon committed Sep 6, 2024
1 parent fbb9f54 commit 33af655
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 4 deletions.
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions lustrefs-exporter/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,13 @@ tokio = {workspace = true, features = [
"rt-multi-thread",
"macros",
"process",
"time",
]}
tokio-stream = "0.1.15"
tower = {version = "0.4.13", features = ["timeout", "load-shed", "limit"]}
tracing-subscriber = {workspace = true, features = ["env-filter"]}
tracing.workspace = true
wait-timeout = "0.2.0"

[dev-dependencies]
combine.workspace = true
Expand Down
2 changes: 1 addition & 1 deletion lustrefs-exporter/src/jobstats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ fn render_stat(
);
}
x => {

Check failure on line 266 in lustrefs-exporter/src/jobstats.rs

View workflow job for this annotation

GitHub Actions / Check

unused variable: `x`

Check failure on line 266 in lustrefs-exporter/src/jobstats.rs

View workflow job for this annotation

GitHub Actions / Clippy

unused variable: `x`

Check failure on line 266 in lustrefs-exporter/src/jobstats.rs

View workflow job for this annotation

GitHub Actions / coverage

unused variable: `x`

Check failure on line 266 in lustrefs-exporter/src/jobstats.rs

View workflow job for this annotation

GitHub Actions / Test Suite

unused variable: `x`
tracing::debug!("Unhandled OST jobstats stats: {x}");
// tracing::debug!("Unhandled OST jobstats stats: {x}");
continue;
}
};
Expand Down
2 changes: 2 additions & 0 deletions lustrefs-exporter/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ pub enum Error {
Utf8(#[from] std::str::Utf8Error),
#[error("Could not find match for {0} in {1}")]
NoCap(&'static str, String),
#[error(transparent)]
Timeout(#[from] tokio::time::error::Elapsed),
}

impl IntoResponse for Error {
Expand Down
23 changes: 20 additions & 3 deletions lustrefs-exporter/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,17 @@ use std::{
convert::Infallible,
io::{self, BufRead, BufReader},
net::SocketAddr,
time::Duration,
};
use tokio::process::Command;
use tokio_stream::{wrappers::ReceiverStream, StreamExt};
use tower::ServiceBuilder;
use wait_timeout::ChildExt;

const LUSTREFS_EXPORTER_PORT: &str = "32221";

static TIMEOUT_DURATION: Duration = Duration::from_secs(120);

#[derive(Debug, Parser)]
pub struct CommandOpts {
/// Port that exporter will listen to
Expand Down Expand Up @@ -81,14 +85,18 @@ async fn main() -> Result<(), Error> {
.concurrency_limit(10); // Max 10 concurrent scrape

let app = Router::new()
.route("/metrics", get(scrape))
.route("/metrics", get(outer_scrape))
.layer(load_shedder);

axum::serve(listener, app).await?;

Ok(())
}

async fn outer_scrape(Query(params): Query<Params>) -> Result<Response<Body>, Error> {
tokio::time::timeout(TIMEOUT_DURATION, scrape(Query(params))).await?
}

async fn scrape(Query(params): Query<Params>) -> Result<Response<Body>, Error> {
let jobstats = if params.jobstats {
let child = tokio::task::spawn_blocking(move || {
Expand Down Expand Up @@ -126,8 +134,17 @@ async fn scrape(Query(params): Query<Params>) -> Result<Response<Body>, Error> {

let (_, rx) = lustrefs_exporter::jobstats::jobstats_stream(reader);

tokio::task::spawn_blocking(move || {
if let Err(e) = child.wait() {
tokio::task::spawn_blocking(move || match child.wait_timeout(TIMEOUT_DURATION) {
Ok(Some(status)) => {
if !status.success() {
tracing::debug!("lctl jobstats failed: {status}");
}
}
Ok(None) => {
tracing::debug!("lctl jobstats timed out");
_ = child.kill();
}
Err(e) => {
tracing::debug!("Unexpected error when waiting for child: {e}");
}
});
Expand Down

0 comments on commit 33af655

Please sign in to comment.