From b293f6e0deb0f8339f87cc083814e7bbfb4d43e0 Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Tue, 3 Dec 2024 04:48:19 +0000 Subject: [PATCH 1/9] add thread manager --- Cargo.lock | 276 +++++++++++++++--- Cargo.toml | 2 + thread-manager/Cargo.toml | 21 ++ thread-manager/README.md | 14 + .../examples/core_contention_basics.rs | 136 +++++++++ .../core_contention_contending_set.json | 31 ++ .../core_contention_dedicated_set.json | 31 ++ .../core_contention_single_runtime.json | 20 ++ .../examples/core_contention_sweep.rs | 223 ++++++++++++++ thread-manager/examples/report.lua | 4 + thread-manager/src/lib.rs | 228 +++++++++++++++ thread-manager/src/native_thread_runtime.rs | 120 ++++++++ thread-manager/src/policy.rs | 26 ++ thread-manager/src/rayon_runtime.rs | 44 +++ thread-manager/src/runtime_manager.rs | 0 thread-manager/src/tokio_runtime.rs | 140 +++++++++ 16 files changed, 1276 insertions(+), 40 deletions(-) create mode 100644 thread-manager/Cargo.toml create mode 100644 thread-manager/README.md create mode 100644 thread-manager/examples/core_contention_basics.rs create mode 100644 thread-manager/examples/core_contention_contending_set.json create mode 100644 thread-manager/examples/core_contention_dedicated_set.json create mode 100644 thread-manager/examples/core_contention_single_runtime.json create mode 100644 thread-manager/examples/core_contention_sweep.rs create mode 100644 thread-manager/examples/report.lua create mode 100644 thread-manager/src/lib.rs create mode 100644 thread-manager/src/native_thread_runtime.rs create mode 100644 thread-manager/src/policy.rs create mode 100644 thread-manager/src/rayon_runtime.rs create mode 100644 thread-manager/src/runtime_manager.rs create mode 100644 thread-manager/src/tokio_runtime.rs diff --git a/Cargo.lock b/Cargo.lock index 6730cf067af3df..5acec2a9028929 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -63,6 +63,18 @@ dependencies = [ "zeroize", ] +[[package]] +name = "affinity" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "763e484feceb7dd021b21c5c6f81aee06b1594a743455ec7efbf72e6355e447b" +dependencies = [ + "cfg-if 1.0.0", + "errno", + "libc", + "num_cpus", +] + [[package]] name = "agave-accounts-hash-cache-tool" version = "2.2.0" @@ -85,7 +97,7 @@ dependencies = [ "clap 2.33.3", "flate2", "hex", - "hyper", + "hyper 0.14.31", "log", "serde", "serde_derive", @@ -232,6 +244,20 @@ dependencies = [ "solana-version", ] +[[package]] +name = "agave-thread-manager" +version = "2.2.0" +dependencies = [ + "affinity", + "anyhow", + "axum 0.7.9", + "rayon", + "serde", + "serde_json", + "thread-priority", + "tokio", +] + [[package]] name = "agave-transaction-view" version = "2.2.0" @@ -777,13 +803,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" dependencies = [ "async-trait", - "axum-core", + "axum-core 0.3.4", "bitflags 1.3.2", "bytes", "futures-util", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.5", + "hyper 0.14.31", "itoa", "matchit", "memchr", @@ -792,12 +818,46 @@ dependencies = [ "pin-project-lite", "rustversion", "serde", - "sync_wrapper", - "tower", + "sync_wrapper 0.1.2", + "tower 0.4.13", "tower-layer", "tower-service", ] +[[package]] +name = "axum" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" +dependencies = [ + "async-trait", + "axum-core 0.4.5", + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "hyper 1.5.1", + "hyper-util", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding 2.3.1", + "pin-project-lite", + "rustversion", + "serde", + "serde_json", + "serde_path_to_error", + "serde_urlencoded", + "sync_wrapper 1.0.2", + "tokio", + "tower 0.5.1", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "axum-core" version = "0.3.4" @@ -807,14 +867,35 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.5", "mime", "rustversion", "tower-layer", "tower-service", ] +[[package]] +name = "axum-core" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "http-body-util", + "mime", + "pin-project-lite", + "rustversion", + "sync_wrapper 1.0.2", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "backoff" version = "0.4.0" @@ -2208,13 +2289,13 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f4b0ea5ef6dc2388a4b1669fa32097249bc03a15417b97cb75e38afb309e4a89" dependencies = [ - "http", + "http 0.2.12", "prost", "tokio", "tokio-stream", "tonic", "tonic-build", - "tower", + "tower 0.4.13", "tower-service", ] @@ -2759,7 +2840,7 @@ dependencies = [ "bitflags 1.3.2", "bytes", "headers-core", - "http", + "http 0.2.12", "httpdate", "mime", "sha-1 0.10.0", @@ -2771,7 +2852,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7f66481bfee273957b1f20485a4ff3362987f85b2c236580d81b4eb7a326429" dependencies = [ - "http", + "http 0.2.12", ] [[package]] @@ -2861,6 +2942,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.5" @@ -2868,7 +2960,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", "pin-project-lite", ] @@ -2901,8 +3016,8 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.5", "httparse", "httpdate", "itoa", @@ -2914,6 +3029,25 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", +] + [[package]] name = "hyper-proxy" version = "0.9.1" @@ -2923,8 +3057,8 @@ dependencies = [ "bytes", "futures 0.3.31", "headers", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.31", "hyper-tls", "native-tls", "tokio", @@ -2939,8 +3073,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d78e1e73ec14cf7375674f74d7dde185c8206fd9dea6fb6295e8a98098aaa97" dependencies = [ "futures-util", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.31", "rustls 0.21.12", "tokio", "tokio-rustls", @@ -2952,7 +3086,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper", + "hyper 0.14.31", "pin-project-lite", "tokio", "tokio-io-timeout", @@ -2965,12 +3099,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.31", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" +dependencies = [ + "bytes", + "futures-util", + "http 1.1.0", + "http-body 1.0.1", + "hyper 1.5.1", + "pin-project-lite", + "tokio", + "tower-service", +] + [[package]] name = "iana-time-zone" version = "0.1.46" @@ -3393,7 +3543,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1dea6e07251d9ce6a552abfb5d7ad6bc290a4596c8dcc3d795fae2bbdc1f3ff" dependencies = [ "futures 0.3.31", - "hyper", + "hyper 0.14.31", "jsonrpc-core", "jsonrpc-server-utils", "log", @@ -5012,9 +5162,9 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.5", + "hyper 0.14.31", "hyper-rustls", "hyper-tls", "ipnet", @@ -5031,7 +5181,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 0.1.2", "system-configuration", "tokio", "tokio-native-tls", @@ -5054,7 +5204,7 @@ checksum = "5a735987236a8e238bf0296c7e351b999c188ccc11477f311b82b55c93984216" dependencies = [ "anyhow", "async-trait", - "http", + "http 0.2.12", "reqwest", "serde", "task-local-extensions", @@ -5484,6 +5634,16 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_path_to_error" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" +dependencies = [ + "itoa", + "serde", +] + [[package]] name = "serde_spanned" version = "0.6.5" @@ -9116,8 +9276,8 @@ dependencies = [ "flate2", "futures 0.3.31", "goauth", - "http", - "hyper", + "http 0.2.12", + "hyper 0.14.31", "hyper-proxy", "log", "openssl", @@ -10566,6 +10726,12 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" + [[package]] name = "synstructure" version = "0.12.6" @@ -10826,6 +10992,20 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "thread-priority" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe075d7053dae61ac5413a34ea7d4913b6e6207844fd726bdd858b37ff72bf5" +dependencies = [ + "bitflags 2.6.0", + "cfg-if 1.0.0", + "libc", + "log", + "rustversion", + "winapi 0.3.9", +] + [[package]] name = "thread-scoped" version = "1.0.2" @@ -11125,15 +11305,15 @@ checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" dependencies = [ "async-stream", "async-trait", - "axum", + "axum 0.6.20", "base64 0.21.7", "bytes", "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.5", + "hyper 0.14.31", "hyper-timeout", "percent-encoding 2.3.1", "pin-project", @@ -11142,7 +11322,7 @@ dependencies = [ "tokio", "tokio-rustls", "tokio-stream", - "tower", + "tower 0.4.13", "tower-layer", "tower-service", "tracing", @@ -11181,17 +11361,33 @@ dependencies = [ "tracing", ] +[[package]] +name = "tower" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 0.1.2", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower-layer" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" [[package]] name = "tower-service" -version = "0.3.2" +version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" +checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" @@ -11270,7 +11466,7 @@ dependencies = [ "byteorder", "bytes", "data-encoding", - "http", + "http 0.2.12", "httparse", "log", "rand 0.8.5", diff --git a/Cargo.toml b/Cargo.toml index 6055802e73c5a1..666ef71dc60d1b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -198,6 +198,7 @@ members = [ "svm-transaction", "test-validator", "thin-client", + "thread-manager", "timings", "tls-utils", "tokens", @@ -446,6 +447,7 @@ solana-bucket-map = { path = "bucket_map", version = "=2.2.0" } solana-builtins = { path = "builtins", version = "=2.2.0" } solana-builtins-default-costs = { path = "builtins-default-costs", version = "=2.2.0" } agave-cargo-registry = { path = "cargo-registry", version = "=2.2.0" } +agave-thread-manager = { path = "thread-manager", version = "=2.2.0" } solana-clap-utils = { path = "clap-utils", version = "=2.2.0" } solana-clap-v3-utils = { path = "clap-v3-utils", version = "=2.2.0" } solana-cli = { path = "cli", version = "=2.2.0" } diff --git a/thread-manager/Cargo.toml b/thread-manager/Cargo.toml new file mode 100644 index 00000000000000..69d40c4601dbf3 --- /dev/null +++ b/thread-manager/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "agave-thread-manager" +version = { workspace = true } +authors = { workspace = true } +repository = { workspace = true } +homepage = { workspace = true } +license = { workspace = true } +edition = { workspace = true } + + +[dependencies] +affinity = "0.1.2" +anyhow = { workspace = true } +serde = { workspace = true, features = ["derive"] } +thread-priority = "1.2.0" +tokio = { workspace = true, features = ["time", "rt-multi-thread"] } +rayon = { workspace = true } + +[dev-dependencies] +axum = "0.7.9" +serde_json = { workspace = true } diff --git a/thread-manager/README.md b/thread-manager/README.md new file mode 100644 index 00000000000000..7fa25ffc1571f6 --- /dev/null +++ b/thread-manager/README.md @@ -0,0 +1,14 @@ +# thread-manager +Balances machine resources between multiple Tokio runtimes + +# Supported threading models +## Tokio +Multiple tokio runtimes can be created, and each may be assigned its own pool of CPU cores to run on. +Number of worker and blocking threads is configurable + +## Native +Native threads can be spawned from managed pools, this allows them to inheirt a particular affinity from the pool, as well as to +control the total number of threads made in every pool. + +## Rayon +Rayon already manages thread pools, all thread_manager does on top is enforce affinity and priority for rayon threads. diff --git a/thread-manager/examples/core_contention_basics.rs b/thread-manager/examples/core_contention_basics.rs new file mode 100644 index 00000000000000..ed40dd6918de85 --- /dev/null +++ b/thread-manager/examples/core_contention_basics.rs @@ -0,0 +1,136 @@ +use std::{ + future::IntoFuture, + io::Write, + net::{IpAddr, Ipv4Addr, SocketAddr}, + path::PathBuf, + time::Duration, +}; + +async fn axum_main(port: u16) { + use axum::{routing::get, Router}; + + // basic handler that responds with a static string + async fn root() -> &'static str { + tokio::time::sleep(Duration::from_millis(1)).await; + "Hello, World!" + } + + // build our application with a route + let app = Router::new().route("/", get(root)); + + // run our app with hyper, listening globally on port 3000 + let listener = + tokio::net::TcpListener::bind(SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), port)) + .await + .unwrap(); + let timeout = tokio::time::timeout( + Duration::from_secs(11), + axum::serve(listener, app).into_future(), + ) + .await; + match timeout { + Ok(v) => v.unwrap(), + Err(_) => { + println!("Terminating server on port {port}"); + } + } +} +use affinity::*; +use agave_thread_manager::*; + +fn main() -> anyhow::Result<()> { + println!( + "\tCurrent thread affinity : {:?}", + get_thread_affinity().unwrap() + ); + println!("\tTotal cores : {}", get_core_num()); + + let experiments = [ + "examples/core_contention_dedicated_set.json", + "examples/core_contention_contending_set.json", + ]; + + for exp in experiments { + println!("==================="); + println!("Running {exp}"); + let mut conffile = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + conffile.push(exp); + let conffile = std::fs::File::open(conffile)?; + let cfg: RuntimeManagerConfig = serde_json::from_reader(conffile)?; + //println!("Loaded config {}", serde_json::to_string_pretty(&cfg)?); + + let rtm = RuntimeManager::new(cfg).unwrap(); + let tok1 = rtm + .get_tokio("axum1") + .expect("Expecting runtime named axum1"); + let tok2 = rtm + .get_tokio("axum2") + .expect("Expecting runtime named axum2"); + + let wrk_cores: Vec<_> = (32..64).collect(); + let results = std::thread::scope(|s| { + s.spawn(|| { + tok1.start(axum_main(8888)); + }); + s.spawn(|| { + tok2.start(axum_main(8889)); + }); + let jh = s.spawn(|| run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap()); + jh.join().expect("WRK crashed!") + }); + println!("Results are: {:?}", results); + } + Ok(()) +} + +fn run_wrk( + ports: &[u16], + cpus: &[usize], + threads: usize, + connections: usize, +) -> anyhow::Result<(Vec<Duration>, Vec<f32>)> { + let mut script = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + script.push("examples/report.lua"); + let cpus: Vec<String> = cpus.iter().map(|c| c.to_string()).collect(); + let cpus = cpus.join(","); + + let mut children: Vec<_> = ports + .iter() + .map(|p| { + std::process::Command::new("taskset") + .arg("-c") + .arg(&cpus) + .arg("wrk") + .arg(format!("http://localhost:{}", p)) + .arg("-d10") + .arg(format!("-s{}", script.to_str().unwrap())) + .arg(format!("-t{threads}")) + .arg(format!("-c{connections}")) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .unwrap() + }) + .collect(); + + use std::str; + let outs = children.drain(..).map(|c| c.wait_with_output().unwrap()); + let mut all_latencies = vec![]; + let mut all_rps = vec![]; + for (out, port) in outs.zip(ports.iter()) { + println!("========================="); + std::io::stdout().write_all(&out.stderr)?; + let res = str::from_utf8(&out.stdout)?; + let mut res = res.lines().last().unwrap().split(' '); + + let latency_us: u64 = res.next().unwrap().parse()?; + let latency = Duration::from_micros(latency_us); + + let requests: usize = res.next().unwrap().parse()?; + let rps = requests as f32 / 10.0; + println!("WRK results for port {port}: {latency:?} {rps}"); + all_latencies.push(Duration::from_micros(latency_us)); + all_rps.push(rps); + } + Ok((all_latencies, all_rps)) +} diff --git a/thread-manager/examples/core_contention_contending_set.json b/thread-manager/examples/core_contention_contending_set.json new file mode 100644 index 00000000000000..1225cc8e494b0f --- /dev/null +++ b/thread-manager/examples/core_contention_contending_set.json @@ -0,0 +1,31 @@ +{ + "tokio_configs": { + "tokio1": { + "worker_threads": 8, + "max_blocking_threads": 1, + "priority": 0, + "core_allocation": { + "DedicatedCoreSet": { + "min": 0, + "max": 8 + } + } + }, + "tokio2": { + "worker_threads": 8, + "max_blocking_threads": 1, + "priority": 0, + "core_allocation": { + "DedicatedCoreSet": { + "min": 0, + "max": 8 + } + } + } + }, + "tokio_runtime_mapping": { + "axum2": "tokio2", + "axum1": "tokio1" + }, + "native_configs": {} +} diff --git a/thread-manager/examples/core_contention_dedicated_set.json b/thread-manager/examples/core_contention_dedicated_set.json new file mode 100644 index 00000000000000..4e9c76170cf7cf --- /dev/null +++ b/thread-manager/examples/core_contention_dedicated_set.json @@ -0,0 +1,31 @@ +{ + "tokio_configs": { + "tokio1": { + "worker_threads": 4, + "max_blocking_threads": 1, + "priority": 0, + "core_allocation": { + "DedicatedCoreSet": { + "min": 0, + "max": 4 + } + } + }, + "tokio2": { + "worker_threads": 4, + "max_blocking_threads": 1, + "priority": 0, + "core_allocation": { + "DedicatedCoreSet": { + "min": 4, + "max": 8 + } + } + } + }, + "tokio_runtime_mapping": { + "axum2": "tokio2", + "axum1": "tokio1" + }, + "native_configs": {} +} diff --git a/thread-manager/examples/core_contention_single_runtime.json b/thread-manager/examples/core_contention_single_runtime.json new file mode 100644 index 00000000000000..42d743a188cc35 --- /dev/null +++ b/thread-manager/examples/core_contention_single_runtime.json @@ -0,0 +1,20 @@ +{ + "tokio_configs": { + "tokio1": { + "worker_threads": 8, + "max_blocking_threads": 1, + "priority": 0, + "core_allocation": { + "DedicatedCoreSet": { + "min": 0, + "max": 8 + } + } + } + }, + "tokio_runtime_mapping": { + "axum2": "tokio1", + "axum1": "tokio1" + }, + "native_configs": {} +} diff --git a/thread-manager/examples/core_contention_sweep.rs b/thread-manager/examples/core_contention_sweep.rs new file mode 100644 index 00000000000000..53706a09a344dd --- /dev/null +++ b/thread-manager/examples/core_contention_sweep.rs @@ -0,0 +1,223 @@ +use std::{ + collections::HashMap, + future::IntoFuture, + io::Write, + net::{IpAddr, Ipv4Addr, SocketAddr}, + path::PathBuf, + time::Duration, +}; + +async fn axum_main(port: u16) { + use axum::{routing::get, Router}; + + // basic handler that responds with a static string + async fn root() -> &'static str { + tokio::time::sleep(Duration::from_millis(1)).await; + "Hello, World!" + } + + // build our application with a route + let app = Router::new().route("/", get(root)); + + // run our app with hyper, listening globally on port 3000 + let listener = + tokio::net::TcpListener::bind(SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), port)) + .await + .unwrap(); + let timeout = tokio::time::timeout( + Duration::from_secs(11), + axum::serve(listener, app).into_future(), + ) + .await; + match timeout { + Ok(v) => v.unwrap(), + Err(_) => { + println!("Terminating server on port {port}"); + } + } +} +use agave_thread_manager::*; +fn make_config_shared(cc: usize) -> RuntimeManagerConfig { + let mut tokio_cfg_1 = TokioConfig::default(); + tokio_cfg_1.core_allocation = CoreAllocation::DedicatedCoreSet { min: 0, max: cc }; + tokio_cfg_1.worker_threads = cc; + let mut tokio_cfg_2 = TokioConfig::default(); + tokio_cfg_2.core_allocation = CoreAllocation::DedicatedCoreSet { min: 0, max: cc }; + tokio_cfg_2.worker_threads = cc; + RuntimeManagerConfig { + tokio_configs: HashMap::from([ + ("tokio1".into(), tokio_cfg_1), + ("tokio2".into(), tokio_cfg_2), + ]), + tokio_runtime_mapping: HashMap::from([ + ("axum1".into(), "tokio1".into()), + ("axum2".into(), "tokio2".into()), + ]), + ..Default::default() + } +} +fn make_config_dedicated(cc: usize) -> RuntimeManagerConfig { + let mut tokio_cfg_1 = TokioConfig::default(); + tokio_cfg_1.core_allocation = CoreAllocation::DedicatedCoreSet { + min: 0, + max: cc / 2, + }; + tokio_cfg_1.worker_threads = cc / 2; + let mut tokio_cfg_2 = TokioConfig::default(); + tokio_cfg_2.core_allocation = CoreAllocation::DedicatedCoreSet { + min: cc / 2, + max: cc, + }; + tokio_cfg_2.worker_threads = cc / 2; + RuntimeManagerConfig { + tokio_configs: HashMap::from([ + ("tokio1".into(), tokio_cfg_1), + ("tokio2".into(), tokio_cfg_2), + ]), + tokio_runtime_mapping: HashMap::from([ + ("axum1".into(), "tokio1".into()), + ("axum2".into(), "tokio2".into()), + ]), + ..Default::default() + } +} + +#[derive(Debug, PartialEq, Eq, Clone, Copy)] +enum Regime { + Shared, + Dedicated, + Single, +} +impl Regime { + const VALUES: [Self; 3] = [Self::Shared, Self::Dedicated, Self::Single]; +} + +#[derive(Debug, Default, serde::Serialize)] +struct Results { + latencies_s: Vec<f32>, + rps: Vec<f32>, +} + +fn main() -> anyhow::Result<()> { + let mut all_results: HashMap<String, Results> = HashMap::new(); + for regime in Regime::VALUES { + let mut res = Results::default(); + for core_cnt in [2, 4, 8, 16] { + let rtm; + println!("==================="); + println!("Running {core_cnt} cores under {regime:?}"); + let (tok1, tok2) = match regime { + Regime::Shared => { + rtm = RuntimeManager::new(make_config_shared(core_cnt)).unwrap(); + ( + rtm.get_tokio("axum1") + .expect("Expecting runtime named axum1"), + rtm.get_tokio("axum2") + .expect("Expecting runtime named axum2"), + ) + } + Regime::Dedicated => { + rtm = RuntimeManager::new(make_config_dedicated(core_cnt)).unwrap(); + ( + rtm.get_tokio("axum1") + .expect("Expecting runtime named axum1"), + rtm.get_tokio("axum2") + .expect("Expecting runtime named axum2"), + ) + } + Regime::Single => { + rtm = RuntimeManager::new(make_config_shared(core_cnt)).unwrap(); + ( + rtm.get_tokio("axum1") + .expect("Expecting runtime named axum1"), + rtm.get_tokio("axum2") + .expect("Expecting runtime named axum2"), + ) + } + }; + + let wrk_cores: Vec<_> = (32..64).collect(); + let results = std::thread::scope(|s| { + s.spawn(|| { + tok1.start(axum_main(8888)); + }); + let jh = match regime { + Regime::Single => s.spawn(|| { + run_wrk(&[8888, 8888], &wrk_cores, wrk_cores.len(), 1000).unwrap() + }), + _ => { + s.spawn(|| { + tok2.start(axum_main(8889)); + }); + s.spawn(|| { + run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap() + }) + } + }; + jh.join().expect("WRK crashed!") + }); + println!("Results are: {:?}", results); + res.latencies_s.push( + results.0.iter().map(|a| a.as_secs_f32()).sum::<f32>() / results.0.len() as f32, + ); + res.rps.push(results.1.iter().sum()); + } + all_results.insert(format!("{regime:?}"), res); + std::thread::sleep(Duration::from_secs(3)); + } + println!("{}", serde_json::to_string_pretty(&all_results)?); + + Ok(()) +} + +fn run_wrk( + ports: &[u16], + cpus: &[usize], + threads: usize, + connections: usize, +) -> anyhow::Result<(Vec<Duration>, Vec<f32>)> { + let mut script = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + script.push("examples/report.lua"); + let cpus: Vec<String> = cpus.iter().map(|c| c.to_string()).collect(); + let cpus = cpus.join(","); + + let mut children: Vec<_> = ports + .iter() + .map(|p| { + std::process::Command::new("taskset") + .arg("-c") + .arg(&cpus) + .arg("wrk") + .arg(format!("http://localhost:{}", p)) + .arg("-d10") + .arg(format!("-s{}", script.to_str().unwrap())) + .arg(format!("-t{threads}")) + .arg(format!("-c{connections}")) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .spawn() + .unwrap() + }) + .collect(); + + use std::str; + let outs = children.drain(..).map(|c| c.wait_with_output().unwrap()); + let mut all_latencies = vec![]; + let mut all_rps = vec![]; + for (out, port) in outs.zip(ports.iter()) { + println!("========================="); + std::io::stdout().write_all(&out.stderr)?; + let res = str::from_utf8(&out.stdout)?; + let mut res = res.lines().last().unwrap().split(' '); + + let latency_us: u64 = res.next().unwrap().parse()?; + let latency = Duration::from_micros(latency_us); + + let requests: usize = res.next().unwrap().parse()?; + let rps = requests as f32 / 10.0; + println!("WRK results for port {port}: {latency:?} {rps}"); + all_latencies.push(Duration::from_micros(latency_us)); + all_rps.push(rps); + } + Ok((all_latencies, all_rps)) +} diff --git a/thread-manager/examples/report.lua b/thread-manager/examples/report.lua new file mode 100644 index 00000000000000..e135db5528667d --- /dev/null +++ b/thread-manager/examples/report.lua @@ -0,0 +1,4 @@ +done = function(summary, latency, requests) + x = string.format("%d %d\n", latency.mean, summary.requests) + io.write(x) +end diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs new file mode 100644 index 00000000000000..08dd6510579821 --- /dev/null +++ b/thread-manager/src/lib.rs @@ -0,0 +1,228 @@ +use { + affinity::*, + anyhow::Ok, + serde::{Deserialize, Serialize}, + std::{ + collections::HashMap, + sync::{ + atomic::{AtomicUsize, Ordering}, + Mutex, + }, + }, + thread_priority::*, +}; + +mod native_thread_runtime; +mod policy; +mod tokio_runtime; +mod rayon_runtime; + +pub use rayon_runtime::{RayonConfig, RayonRuntime} +pub use native_thread_runtime::{NativeConfig, NativeThreadRuntime}; +pub use policy::CoreAllocation; +pub use tokio_runtime::{TokioConfig, TokioRuntime}; +pub type ConstString = Box<str>; + +#[derive(Default, Debug)] +pub struct RuntimeManager { + pub tokio_runtimes: HashMap<ConstString, TokioRuntime>, + pub tokio_runtime_mapping: HashMap<ConstString, ConstString>, + + pub native_thread_runtimes: HashMap<ConstString, NativeThreadRuntime>, + pub native_runtime_mapping: HashMap<ConstString, ConstString>, +} + +#[derive(Default, Clone, Debug, Serialize, Deserialize)] +#[serde(default)] +pub struct RuntimeManagerConfig { + pub tokio_configs: HashMap<String, TokioConfig>, + pub tokio_runtime_mapping: HashMap<String, String>, + pub native_runtime_mapping: HashMap<String, String>, + pub native_configs: HashMap<String, NativeConfig>, + pub default_core_allocation: CoreAllocation, +} + +impl RuntimeManager { + pub fn get_native(&self, name: &str) -> Option<&NativeThreadRuntime> { + let n = self.native_runtime_mapping.get(name)?; + self.native_thread_runtimes.get(n) + } + pub fn get_tokio(&self, name: &str) -> Option<&TokioRuntime> { + let n = self.tokio_runtime_mapping.get(name)?; + self.tokio_runtimes.get(n) + } + pub fn set_process_affinity(config: &RuntimeManagerConfig) -> anyhow::Result<Vec<usize>> { + let chosen_cores_mask: Vec<usize> = { + match config.default_core_allocation { + CoreAllocation::PinnedCores { min, max } => (min..max).collect(), + CoreAllocation::DedicatedCoreSet { min, max } => (min..max).collect(), + CoreAllocation::OsDefault => vec![], + } + }; + + if let Err(e) = set_thread_affinity(&chosen_cores_mask) { + anyhow::bail!(e.to_string()) + } + Ok(chosen_cores_mask) + } + + pub fn new(config: RuntimeManagerConfig) -> anyhow::Result<Self> { + let mut core_allocations = HashMap::<ConstString, Vec<usize>>::new(); + Self::set_process_affinity(&config)?; + let mut manager = Self::default(); + + //TODO: this should probably be cleaned up at some point... + for (k, v) in config.tokio_runtime_mapping.iter() { + manager + .tokio_runtime_mapping + .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); + } + for (k, v) in config.native_runtime_mapping.iter() { + manager + .native_runtime_mapping + .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); + } + + for (name, cfg) in config.native_configs.iter() { + let nrt = NativeThreadRuntime::new(cfg.clone()); + manager + .native_thread_runtimes + .insert(name.clone().into_boxed_str(), nrt); + } + + for (name, cfg) in config.tokio_configs.iter() { + let tokiort = TokioRuntime::new(name.clone(), cfg.clone())?; + + core_allocations.insert(name.clone().into_boxed_str(), cfg.core_allocation.as_core_mask_vector()); + manager.tokio_runtimes.insert( + name.clone().into_boxed_str(), + tokiort + ); + } + Ok(manager) + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::{CoreAllocation, NativeConfig, RuntimeManager, RuntimeManagerConfig}; + + #[test] + fn process_affinity() { + let conf = RuntimeManagerConfig { + native_configs: HashMap::from([( + "pool1".to_owned(), + NativeConfig { + core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, max: 4 }, + max_threads: 5, + priority: 0, + ..Default::default() + }, + )]), + default_core_allocation: CoreAllocation::DedicatedCoreSet { min: 4, max: 8 }, + native_runtime_mapping: HashMap::from([("test".to_owned(), "pool1".to_owned())]), + ..Default::default() + }; + + let rtm = RuntimeManager::new(conf).unwrap(); + let r = rtm.get_native("test").unwrap(); + + let t2 = r + .spawn(|| { + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!(aff, [0, 1, 2, 3], "Managed thread allocation should be 0-3"); + }) + .unwrap(); + + let t = std::thread::spawn(|| { + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!(aff, [4, 5, 6, 7], "Default thread allocation should be 4-7"); + + let tt = std::thread::spawn(|| { + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!( + aff, + [4, 5, 6, 7], + "Nested thread allocation should still be 4-7" + ); + }); + tt.join().unwrap(); + }); + t.join().unwrap(); + t2.join().unwrap(); + } + #[test] + fn rayon_affinity() { + let conf = RuntimeManagerConfig { + native_configs: HashMap::from([( + "pool1".to_owned(), + NativeConfig { + core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, max: 4 }, + max_threads: 5, + priority: 0, + ..Default::default() + }, + )]), + default_core_allocation: CoreAllocation::DedicatedCoreSet { min: 4, max: 8 }, + native_runtime_mapping: HashMap::from([("test".to_owned(), "pool1".to_owned())]), + ..Default::default() + }; + + let rtm = RuntimeManager::new(conf).unwrap(); + let r = rtm.get_native("test").unwrap(); + + let t2 = r + .spawn(|| { + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!(aff, [0, 1, 2, 3], "Managed thread allocation should be 0-3"); + }) + .unwrap(); + + let rayon_pool = rayon::ThreadPoolBuilder::new() + .num_threads(3) + .start_handler(|idx| { + affinity::set_thread_affinity([1, 2, 3]).unwrap(); + }) + /*.spawn_handler(|thread| { + let mut b = std::thread::Builder::new(); + if let Some(name) = thread.name() { + b = b.name(name.to_owned()); + } + if let Some(stack_size) = thread.stack_size() { + b = b.stack_size(stack_size); + } + b.spawn(|| thread.run())?; + Ok(()) + })*/ + .build() + .unwrap(); + + let t = std::thread::spawn(|| { + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!(aff, [4, 5, 6, 7], "Default thread allocation should be 4-7"); + + let tt = std::thread::spawn(|| { + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!( + aff, + [4, 5, 6, 7], + "Nested thread allocation should still be 4-7" + ); + }); + tt.join().unwrap(); + }); + let _rr = rayon_pool.broadcast(|ctx| { + let aff = affinity::get_thread_affinity().unwrap(); + println!("Rayon thread {} reporting", ctx.index()); + assert_eq!( + aff, + [1, 2, 3], + "Rayon thread allocation should still be 1-3" + ); + }); + t.join().unwrap(); + t2.join().unwrap(); + } +} diff --git a/thread-manager/src/native_thread_runtime.rs b/thread-manager/src/native_thread_runtime.rs new file mode 100644 index 00000000000000..7c405d942d24bb --- /dev/null +++ b/thread-manager/src/native_thread_runtime.rs @@ -0,0 +1,120 @@ +use { + crate::policy::CoreAllocation, + anyhow::bail, + serde::{Deserialize, Serialize}, + std::sync::atomic::{AtomicUsize, Ordering}, + std::sync::Arc, +}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(default)] +pub struct NativeConfig { + pub core_allocation: CoreAllocation, + pub max_threads: usize, + pub priority: usize, + pub name_base: String, + pub stack_size_bytes: usize, +} + +impl Default for NativeConfig { + fn default() -> Self { + Self { + core_allocation: CoreAllocation::OsDefault, + max_threads: 10, + priority: 0, + stack_size_bytes: 2 * 1024 * 1024, + name_base: "thread".to_owned(), + } + } +} + +#[derive(Debug)] +pub struct NativeThreadRuntime { + pub id_count: AtomicUsize, + pub running_count: Arc<AtomicUsize>, + pub config: NativeConfig, +} + +pub struct JoinHandle<T> { + std_handle: Option<std::thread::JoinHandle<T>>, + running_count: Arc<AtomicUsize>, +} + +impl<T> JoinHandle<T> { + fn join_inner(&mut self) -> Result<T, Box<dyn core::any::Any + Send + 'static>> { + let r = match self.std_handle.take() { + Some(jh) => { + let r = jh.join(); + self.running_count.fetch_sub(1, Ordering::SeqCst); + r + } + None => { + panic!("Thread already joined"); + } + }; + dbg!(self.std_handle.is_some()); + r + } + + pub fn join(mut self) -> Result<T, Box<dyn core::any::Any + Send + 'static>> { + self.join_inner() + } + + pub fn is_finished(&self) -> bool { + match self.std_handle { + Some(ref jh) => jh.is_finished(), + None => true, + } + } +} + +impl<T> Drop for JoinHandle<T> { + fn drop(&mut self) { + if self.std_handle.is_some() { + println!("Attempting to drop a Join Handle of a running thread will leak thread IDs, please join your managed threads!"); + self.join_inner().expect("Child thread panicked"); + } + } +} + +impl NativeThreadRuntime { + pub fn new(cfg: NativeConfig) -> Self { + Self { + id_count: AtomicUsize::new(0), + running_count: Arc::new(AtomicUsize::new(0)), + config: cfg, + } + } + pub fn spawn<F, T>(&self, f: F) -> anyhow::Result<JoinHandle<T>> + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + let spawned = self.running_count.load(Ordering::SeqCst); + if spawned >= self.config.max_threads { + bail!("All allowed threads in this pool are already spawned"); + } + let core_set: Vec<_> = match self.config.core_allocation { + CoreAllocation::PinnedCores { min: _, max: _ } => { + todo!("Need to store pinning mask somewhere"); + } + CoreAllocation::DedicatedCoreSet { min, max } => (min..max).collect(), + CoreAllocation::OsDefault => (0..affinity::get_core_num()).collect(), + }; + + let n = self.id_count.fetch_add(1, Ordering::SeqCst); + let jh = std::thread::Builder::new() + .name(format!("{}-{}", &self.config.name_base, n)) + .stack_size(self.config.stack_size_bytes) + .spawn(move || { + affinity::set_thread_affinity(core_set).expect("Can not set thread affinity"); + f() + })?; + self.running_count.fetch_add(1, Ordering::SeqCst); + Ok(JoinHandle { + std_handle: Some(jh), + running_count: self.running_count.clone(), + }) + } +} diff --git a/thread-manager/src/policy.rs b/thread-manager/src/policy.rs new file mode 100644 index 00000000000000..8c5758bbfa7dce --- /dev/null +++ b/thread-manager/src/policy.rs @@ -0,0 +1,26 @@ +use serde::{Deserialize, Serialize}; + +#[derive(Default, Debug, Clone, Serialize, Deserialize)] +pub enum CoreAllocation { + ///Use OS default allocation (i.e. do not alter core affinity) + #[default] + OsDefault, + ///Pin each thread to a core in given range. Number of cores should be >= number of threads + PinnedCores { min: usize, max: usize }, + ///Pin the threads to a set of cores + DedicatedCoreSet { min: usize, max: usize }, +} + +impl CoreAllocation { + /// Converts into a vector of core IDs. OsDefault is converted to empty vector. + pub fn as_core_mask_vector(&self) -> Vec<usize> { + match *self { + CoreAllocation::PinnedCores { min, max } => (min..max).collect(), + CoreAllocation::DedicatedCoreSet { min, max } => (min..max).collect(), + CoreAllocation::OsDefault => vec![], + } + } +} + +///Applies policy to the calling thread +pub fn apply_policy(alloc: CoreAllocation, priority: u32) {} diff --git a/thread-manager/src/rayon_runtime.rs b/thread-manager/src/rayon_runtime.rs new file mode 100644 index 00000000000000..642711c12b8314 --- /dev/null +++ b/thread-manager/src/rayon_runtime.rs @@ -0,0 +1,44 @@ +use { + crate::policy::CoreAllocation, + anyhow::Ok, + serde::{Deserialize, Serialize}, +}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(default)] +pub struct RayonConfig { + pub worker_threads: usize, + pub priority: u32, + pub stack_size_bytes: usize, + pub core_allocation: CoreAllocation, +} + +impl Default for RayonConfig { + fn default() -> Self { + Self { + core_allocation: CoreAllocation::OsDefault, + worker_threads: 4, + priority: 0, + stack_size_bytes: 2 * 1024 * 1024, + } + } +} + +#[derive(Debug)] +pub struct RayonRuntime { + pub rayon_pool: rayon::ThreadPool, + pub config: RayonConfig, +} + +impl RayonRuntime { + fn new(config: RayonConfig) -> anyhow::Result<Self> { + let policy = config.core_allocation; + let rayon_pool = rayon::ThreadPoolBuilder::new() + .num_threads(config.worker_threads) + .start_handler(move |idx| { + affinity::set_thread_affinity([1, 2, 3]).unwrap(); + }) + .build()?; + Ok(Self { rayon_pool, config }) + } +} diff --git a/thread-manager/src/runtime_manager.rs b/thread-manager/src/runtime_manager.rs new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/thread-manager/src/tokio_runtime.rs b/thread-manager/src/tokio_runtime.rs new file mode 100644 index 00000000000000..e5d2efda70b2e6 --- /dev/null +++ b/thread-manager/src/tokio_runtime.rs @@ -0,0 +1,140 @@ +use { + crate::policy::CoreAllocation, + serde::{Deserialize, Serialize}, + std::{ + future::Future, + sync::{ + atomic::{AtomicUsize, Ordering}, + Mutex, + }, + }, + thread_priority::ThreadExt, +}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(default)] +pub struct TokioConfig { + ///number of worker threads tokio is allowed to spawn + pub worker_threads: usize, + ///max number of blocking threads tokio is allowed to spawn + pub max_blocking_threads: usize, + pub priority: u32, + pub stack_size_bytes: usize, + pub event_interval: u32, + pub core_allocation: CoreAllocation, +} + +impl Default for TokioConfig { + fn default() -> Self { + Self { + core_allocation: CoreAllocation::OsDefault, + worker_threads: 1, + max_blocking_threads: 1, + priority: 0, + stack_size_bytes: 2 * 1024 * 1024, + event_interval: 61, + } + } +} + +#[derive(Debug)] +pub struct TokioRuntime { + pub(crate) tokio: tokio::runtime::Runtime, + pub config: TokioConfig, +} +impl TokioRuntime { + pub(crate) fn new(name: String, cfg: TokioConfig) -> anyhow::Result<Self> { + let num_workers = if cfg.worker_threads == 0 { + affinity::get_core_num() + } else { + cfg.worker_threads + }; + let chosen_cores_mask = cfg.core_allocation.as_core_mask_vector(); + + let base_name = name.clone(); + println!( + "Assigning {:?} to runtime {}", + &chosen_cores_mask, &base_name + ); + let mut builder = match num_workers { + 1 => tokio::runtime::Builder::new_current_thread(), + _ => { + let mut builder = tokio::runtime::Builder::new_multi_thread(); + builder.worker_threads(num_workers); + builder + } + }; + let atomic_id: AtomicUsize = AtomicUsize::new(0); + builder + .event_interval(cfg.event_interval) + .thread_name_fn(move || { + let id = atomic_id.fetch_add(1, Ordering::SeqCst); + format!("{}-{}", base_name, id) + }) + .thread_stack_size(cfg.stack_size_bytes) + .enable_all() + .max_blocking_threads(cfg.max_blocking_threads); + + //keep borrow checker happy and move these things into the closure + let c = cfg.clone(); + let chosen_cores_mask = Mutex::new(chosen_cores_mask); + builder.on_thread_start(move || { + let cur_thread = std::thread::current(); + let _tid = cur_thread + .get_native_id() + .expect("Can not get thread id for newly created thread"); + let tname = cur_thread.name().unwrap(); + //println!("thread {tname} id {tid} started"); + std::thread::current() + .set_priority(thread_priority::ThreadPriority::Crossplatform( + (c.priority as u8).try_into().unwrap(), + )) + .expect("Can not set thread priority!"); + + match c.core_allocation { + CoreAllocation::PinnedCores { min: _, max: _ } => { + let mut lg = chosen_cores_mask + .lock() + .expect("Can not lock core mask mutex"); + let core = lg + .pop() + .expect("Not enough cores provided for pinned allocation"); + println!("Pinning worker {tname} to core {core}"); + affinity::set_thread_affinity([core]) + .expect("Can not set thread affinity for runtime worker"); + } + CoreAllocation::DedicatedCoreSet { min: _, max: _ } => { + let lg = chosen_cores_mask + .lock() + .expect("Can not lock core mask mutex"); + affinity::set_thread_affinity(&(*lg)) + .expect("Can not set thread affinity for runtime worker"); + } + CoreAllocation::OsDefault => {} + } + }); + Ok(TokioRuntime { + tokio: builder.build()?, + config: cfg.clone(), + }) + } + /* This is bad idea... + pub fn spawn<F>(&self, fut: F)-><F as Future>::Output + where F: Future + { + self.tokio.spawn(fut) + } + pub fn spawn_blocking<F>(&self, fut: F)-><F as Future>::Output + where F: Future + { + self.spawn(fut) + } + */ + pub fn start<F>(&self, fut: F) -> F::Output + where + F: Future, + { + // the thread that calls block_on does not need its affinity messed with here + self.tokio.block_on(fut) + } +} From 256194479b19e111ad134dfa9bc40a464d6a1cfb Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Tue, 3 Dec 2024 14:30:46 +0000 Subject: [PATCH 2/9] organizing policy code --- Cargo.lock | 2 +- thread-manager/README.md | 14 +++++++--- .../examples/core_contention_basics.rs | 3 +-- thread-manager/src/lib.rs | 27 +++++++++++-------- thread-manager/src/native_thread_runtime.rs | 6 +++-- thread-manager/src/policy.rs | 2 +- thread-manager/src/rayon_runtime.rs | 7 ++--- 7 files changed, 37 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5acec2a9028929..8c3a02713a6af8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2773,7 +2773,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", "indexmap 2.7.0", "slab", "tokio", diff --git a/thread-manager/README.md b/thread-manager/README.md index 7fa25ffc1571f6..f12861621686e1 100644 --- a/thread-manager/README.md +++ b/thread-manager/README.md @@ -1,14 +1,20 @@ # thread-manager -Balances machine resources between multiple Tokio runtimes +Balances machine resources between multiple threaded runtimes. The purpose is to manage thread contention between different parts of the code that may +benefit from a diverse set of management options. For example, we may want to have cores 1-4 handling networking via Tokio, core 5 handling file IO via Tokio, cores 9-16 hallocated for Rayon thread pool, and cores 6-8 available for general use by std::thread. This will minimize contention for CPU caches and context switches that would occur if Rayon was entirely unaware it was running side-by-side with tokio, and each was to spawn as many threads as there are cores. # Supported threading models ## Tokio Multiple tokio runtimes can be created, and each may be assigned its own pool of CPU cores to run on. -Number of worker and blocking threads is configurable +Number of worker and blocking threads is configurable, as are thread priorities for the pool. ## Native -Native threads can be spawned from managed pools, this allows them to inheirt a particular affinity from the pool, as well as to +Native threads (std::thread) can be spawned from managed pools, this allows them to inheirt a particular affinity from the pool, as well as to control the total number of threads made in every pool. ## Rayon -Rayon already manages thread pools, all thread_manager does on top is enforce affinity and priority for rayon threads. +Rayon already manages thread pools well enough, all thread_manager does on top is enforce affinity and priority for rayon threads. Normally one would only ever have one rayon pool, but for priority allocations one may want to spawn many rayon pools. + +# Limitations + + * Thread pools can only be created at process startup + * Once thread pool is created, its policy can not be modified at runtime diff --git a/thread-manager/examples/core_contention_basics.rs b/thread-manager/examples/core_contention_basics.rs index ed40dd6918de85..ff1bf7c9fc4279 100644 --- a/thread-manager/examples/core_contention_basics.rs +++ b/thread-manager/examples/core_contention_basics.rs @@ -35,8 +35,7 @@ async fn axum_main(port: u16) { } } } -use affinity::*; -use agave_thread_manager::*; +use {affinity::*, agave_thread_manager::*}; fn main() -> anyhow::Result<()> { println!( diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs index 08dd6510579821..9f58c70d580e55 100644 --- a/thread-manager/src/lib.rs +++ b/thread-manager/src/lib.rs @@ -14,13 +14,15 @@ use { mod native_thread_runtime; mod policy; -mod tokio_runtime; mod rayon_runtime; +mod tokio_runtime; -pub use rayon_runtime::{RayonConfig, RayonRuntime} -pub use native_thread_runtime::{NativeConfig, NativeThreadRuntime}; -pub use policy::CoreAllocation; -pub use tokio_runtime::{TokioConfig, TokioRuntime}; +pub use { + native_thread_runtime::{NativeConfig, NativeThreadRuntime}, + policy::CoreAllocation, + rayon_runtime::{RayonConfig, RayonRuntime}, + tokio_runtime::{TokioConfig, TokioRuntime}, +}; pub type ConstString = Box<str>; #[derive(Default, Debug)] @@ -93,11 +95,13 @@ impl RuntimeManager { for (name, cfg) in config.tokio_configs.iter() { let tokiort = TokioRuntime::new(name.clone(), cfg.clone())?; - core_allocations.insert(name.clone().into_boxed_str(), cfg.core_allocation.as_core_mask_vector()); - manager.tokio_runtimes.insert( + core_allocations.insert( name.clone().into_boxed_str(), - tokiort + cfg.core_allocation.as_core_mask_vector(), ); + manager + .tokio_runtimes + .insert(name.clone().into_boxed_str(), tokiort); } Ok(manager) } @@ -105,9 +109,10 @@ impl RuntimeManager { #[cfg(test)] mod tests { - use std::collections::HashMap; - - use crate::{CoreAllocation, NativeConfig, RuntimeManager, RuntimeManagerConfig}; + use { + crate::{CoreAllocation, NativeConfig, RuntimeManager, RuntimeManagerConfig}, + std::collections::HashMap, + }; #[test] fn process_affinity() { diff --git a/thread-manager/src/native_thread_runtime.rs b/thread-manager/src/native_thread_runtime.rs index 7c405d942d24bb..fad1457347d0f9 100644 --- a/thread-manager/src/native_thread_runtime.rs +++ b/thread-manager/src/native_thread_runtime.rs @@ -2,8 +2,10 @@ use { crate::policy::CoreAllocation, anyhow::bail, serde::{Deserialize, Serialize}, - std::sync::atomic::{AtomicUsize, Ordering}, - std::sync::Arc, + std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, + }, }; #[derive(Clone, Debug, Serialize, Deserialize)] diff --git a/thread-manager/src/policy.rs b/thread-manager/src/policy.rs index 8c5758bbfa7dce..357b7f2b2a5b40 100644 --- a/thread-manager/src/policy.rs +++ b/thread-manager/src/policy.rs @@ -23,4 +23,4 @@ impl CoreAllocation { } ///Applies policy to the calling thread -pub fn apply_policy(alloc: CoreAllocation, priority: u32) {} +pub fn apply_policy(alloc: &CoreAllocation, priority: u32) {} diff --git a/thread-manager/src/rayon_runtime.rs b/thread-manager/src/rayon_runtime.rs index 642711c12b8314..bc050be58e8f6b 100644 --- a/thread-manager/src/rayon_runtime.rs +++ b/thread-manager/src/rayon_runtime.rs @@ -1,5 +1,5 @@ use { - crate::policy::CoreAllocation, + crate::policy::{apply_policy, CoreAllocation}, anyhow::Ok, serde::{Deserialize, Serialize}, }; @@ -32,11 +32,12 @@ pub struct RayonRuntime { impl RayonRuntime { fn new(config: RayonConfig) -> anyhow::Result<Self> { - let policy = config.core_allocation; + let policy = config.core_allocation.clone(); + let priority = config.priority; let rayon_pool = rayon::ThreadPoolBuilder::new() .num_threads(config.worker_threads) .start_handler(move |idx| { - affinity::set_thread_affinity([1, 2, 3]).unwrap(); + apply_policy(&policy, priority); }) .build()?; Ok(Self { rayon_pool, config }) From bfaf7e219bd52f5e412f80884c8b47f0d68a7f96 Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Tue, 3 Dec 2024 16:02:27 +0000 Subject: [PATCH 3/9] minimal viable version of the thread manager --- thread-manager/Cargo.toml | 3 + thread-manager/README.md | 8 +- .../examples/core_contention_sweep.rs | 36 ++++---- thread-manager/src/lib.rs | 92 +++++++++++-------- thread-manager/src/native_thread_runtime.rs | 18 ++-- thread-manager/src/policy.rs | 38 +++++++- thread-manager/src/rayon_runtime.rs | 10 +- thread-manager/src/tokio_runtime.rs | 35 +------ 8 files changed, 140 insertions(+), 100 deletions(-) diff --git a/thread-manager/Cargo.toml b/thread-manager/Cargo.toml index 69d40c4601dbf3..f127842053da57 100644 --- a/thread-manager/Cargo.toml +++ b/thread-manager/Cargo.toml @@ -1,5 +1,7 @@ [package] name = "agave-thread-manager" +description = "Thread pool manager for agave" + version = { workspace = true } authors = { workspace = true } repository = { workspace = true } @@ -7,6 +9,7 @@ homepage = { workspace = true } license = { workspace = true } edition = { workspace = true } +publish = false [dependencies] affinity = "0.1.2" diff --git a/thread-manager/README.md b/thread-manager/README.md index f12861621686e1..5a75d1dfef28bb 100644 --- a/thread-manager/README.md +++ b/thread-manager/README.md @@ -1,5 +1,5 @@ # thread-manager -Balances machine resources between multiple threaded runtimes. The purpose is to manage thread contention between different parts of the code that may +Balances machine resources between multiple threaded runtimes. The purpose is to manage thread contention between different parts of the code that may benefit from a diverse set of management options. For example, we may want to have cores 1-4 handling networking via Tokio, core 5 handling file IO via Tokio, cores 9-16 hallocated for Rayon thread pool, and cores 6-8 available for general use by std::thread. This will minimize contention for CPU caches and context switches that would occur if Rayon was entirely unaware it was running side-by-side with tokio, and each was to spawn as many threads as there are cores. # Supported threading models @@ -18,3 +18,9 @@ Rayon already manages thread pools well enough, all thread_manager does on top i * Thread pools can only be created at process startup * Once thread pool is created, its policy can not be modified at runtime + +# TODO: + + * support tracing + * proper error handling everywhere + * even more tests diff --git a/thread-manager/examples/core_contention_sweep.rs b/thread-manager/examples/core_contention_sweep.rs index 53706a09a344dd..f160ddf3886d4e 100644 --- a/thread-manager/examples/core_contention_sweep.rs +++ b/thread-manager/examples/core_contention_sweep.rs @@ -38,12 +38,12 @@ async fn axum_main(port: u16) { } use agave_thread_manager::*; fn make_config_shared(cc: usize) -> RuntimeManagerConfig { - let mut tokio_cfg_1 = TokioConfig::default(); - tokio_cfg_1.core_allocation = CoreAllocation::DedicatedCoreSet { min: 0, max: cc }; - tokio_cfg_1.worker_threads = cc; - let mut tokio_cfg_2 = TokioConfig::default(); - tokio_cfg_2.core_allocation = CoreAllocation::DedicatedCoreSet { min: 0, max: cc }; - tokio_cfg_2.worker_threads = cc; + let tokio_cfg_1 = TokioConfig { + core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, max: cc }, + worker_threads: cc, + ..Default::default() + }; + let tokio_cfg_2 = tokio_cfg_1.clone(); RuntimeManagerConfig { tokio_configs: HashMap::from([ ("tokio1".into(), tokio_cfg_1), @@ -57,18 +57,22 @@ fn make_config_shared(cc: usize) -> RuntimeManagerConfig { } } fn make_config_dedicated(cc: usize) -> RuntimeManagerConfig { - let mut tokio_cfg_1 = TokioConfig::default(); - tokio_cfg_1.core_allocation = CoreAllocation::DedicatedCoreSet { - min: 0, - max: cc / 2, + let tokio_cfg_1 = TokioConfig { + core_allocation: CoreAllocation::DedicatedCoreSet { + min: 0, + max: cc / 2, + }, + worker_threads: cc / 2, + ..Default::default() }; - tokio_cfg_1.worker_threads = cc / 2; - let mut tokio_cfg_2 = TokioConfig::default(); - tokio_cfg_2.core_allocation = CoreAllocation::DedicatedCoreSet { - min: cc / 2, - max: cc, + let tokio_cfg_2 = TokioConfig { + core_allocation: CoreAllocation::DedicatedCoreSet { + min: cc / 2, + max: cc, + }, + worker_threads: cc / 2, + ..Default::default() }; - tokio_cfg_2.worker_threads = cc / 2; RuntimeManagerConfig { tokio_configs: HashMap::from([ ("tokio1".into(), tokio_cfg_1), diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs index 9f58c70d580e55..87e1393e4e630b 100644 --- a/thread-manager/src/lib.rs +++ b/thread-manager/src/lib.rs @@ -1,21 +1,13 @@ use { - affinity::*, anyhow::Ok, serde::{Deserialize, Serialize}, - std::{ - collections::HashMap, - sync::{ - atomic::{AtomicUsize, Ordering}, - Mutex, - }, - }, - thread_priority::*, + std::collections::HashMap, }; -mod native_thread_runtime; -mod policy; -mod rayon_runtime; -mod tokio_runtime; +pub mod native_thread_runtime; +pub mod policy; +pub mod rayon_runtime; +pub mod tokio_runtime; pub use { native_thread_runtime::{NativeConfig, NativeThreadRuntime}, @@ -32,15 +24,23 @@ pub struct RuntimeManager { pub native_thread_runtimes: HashMap<ConstString, NativeThreadRuntime>, pub native_runtime_mapping: HashMap<ConstString, ConstString>, + + pub rayon_runtimes: HashMap<ConstString, RayonRuntime>, + pub rayon_runtime_mapping: HashMap<ConstString, ConstString>, } #[derive(Default, Clone, Debug, Serialize, Deserialize)] #[serde(default)] pub struct RuntimeManagerConfig { + pub native_configs: HashMap<String, NativeConfig>, + pub native_runtime_mapping: HashMap<String, String>, + + pub rayon_configs: HashMap<String, RayonConfig>, + pub rayon_runtime_mapping: HashMap<String, String>, + pub tokio_configs: HashMap<String, TokioConfig>, pub tokio_runtime_mapping: HashMap<String, String>, - pub native_runtime_mapping: HashMap<String, String>, - pub native_configs: HashMap<String, NativeConfig>, + pub default_core_allocation: CoreAllocation, } @@ -49,6 +49,10 @@ impl RuntimeManager { let n = self.native_runtime_mapping.get(name)?; self.native_thread_runtimes.get(n) } + pub fn get_rayon(&self, name: &str) -> Option<&RayonRuntime> { + let n = self.rayon_runtime_mapping.get(name)?; + self.rayon_runtimes.get(n) + } pub fn get_tokio(&self, name: &str) -> Option<&TokioRuntime> { let n = self.tokio_runtime_mapping.get(name)?; self.tokio_runtimes.get(n) @@ -62,7 +66,7 @@ impl RuntimeManager { } }; - if let Err(e) = set_thread_affinity(&chosen_cores_mask) { + if let Err(e) = affinity::set_thread_affinity(&chosen_cores_mask) { anyhow::bail!(e.to_string()) } Ok(chosen_cores_mask) @@ -84,6 +88,11 @@ impl RuntimeManager { .native_runtime_mapping .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); } + for (k, v) in config.rayon_runtime_mapping.iter() { + manager + .rayon_runtime_mapping + .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); + } for (name, cfg) in config.native_configs.iter() { let nrt = NativeThreadRuntime::new(cfg.clone()); @@ -91,6 +100,12 @@ impl RuntimeManager { .native_thread_runtimes .insert(name.clone().into_boxed_str(), nrt); } + for (name, cfg) in config.rayon_configs.iter() { + let rrt = RayonRuntime::new(cfg.clone())?; + manager + .rayon_runtimes + .insert(name.clone().into_boxed_str(), rrt); + } for (name, cfg) in config.tokio_configs.iter() { let tokiort = TokioRuntime::new(name.clone(), cfg.clone())?; @@ -110,7 +125,7 @@ impl RuntimeManager { #[cfg(test)] mod tests { use { - crate::{CoreAllocation, NativeConfig, RuntimeManager, RuntimeManagerConfig}, + crate::{CoreAllocation, NativeConfig, RayonConfig, RuntimeManager, RuntimeManagerConfig}, std::collections::HashMap, }; @@ -170,8 +185,19 @@ mod tests { ..Default::default() }, )]), + rayon_configs: HashMap::from([( + "rayon1".to_owned(), + RayonConfig { + core_allocation: CoreAllocation::DedicatedCoreSet { min: 1, max: 4 }, + worker_threads: 3, + priority: 0, + ..Default::default() + }, + )]), default_core_allocation: CoreAllocation::DedicatedCoreSet { min: 4, max: 8 }, native_runtime_mapping: HashMap::from([("test".to_owned(), "pool1".to_owned())]), + + rayon_runtime_mapping: HashMap::from([("test".to_owned(), "rayon1".to_owned())]), ..Default::default() }; @@ -184,25 +210,19 @@ mod tests { assert_eq!(aff, [0, 1, 2, 3], "Managed thread allocation should be 0-3"); }) .unwrap(); + let rrt = rtm.get_rayon("test").unwrap(); - let rayon_pool = rayon::ThreadPoolBuilder::new() - .num_threads(3) - .start_handler(|idx| { - affinity::set_thread_affinity([1, 2, 3]).unwrap(); - }) - /*.spawn_handler(|thread| { - let mut b = std::thread::Builder::new(); - if let Some(name) = thread.name() { - b = b.name(name.to_owned()); - } - if let Some(stack_size) = thread.stack_size() { - b = b.stack_size(stack_size); - } - b.spawn(|| thread.run())?; - Ok(()) - })*/ - .build() - .unwrap(); + /*.spawn_handler(|thread| { + let mut b = std::thread::Builder::new(); + if let Some(name) = thread.name() { + b = b.name(name.to_owned()); + } + if let Some(stack_size) = thread.stack_size() { + b = b.stack_size(stack_size); + } + b.spawn(|| thread.run())?; + Ok(()) + })*/ let t = std::thread::spawn(|| { let aff = affinity::get_thread_affinity().unwrap(); @@ -218,7 +238,7 @@ mod tests { }); tt.join().unwrap(); }); - let _rr = rayon_pool.broadcast(|ctx| { + let _rr = rrt.rayon_pool.broadcast(|ctx| { let aff = affinity::get_thread_affinity().unwrap(); println!("Rayon thread {} reporting", ctx.index()); assert_eq!( diff --git a/thread-manager/src/native_thread_runtime.rs b/thread-manager/src/native_thread_runtime.rs index fad1457347d0f9..a8ce5da516e819 100644 --- a/thread-manager/src/native_thread_runtime.rs +++ b/thread-manager/src/native_thread_runtime.rs @@ -1,10 +1,10 @@ use { - crate::policy::CoreAllocation, + crate::policy::{apply_policy, CoreAllocation}, anyhow::bail, serde::{Deserialize, Serialize}, std::sync::{ atomic::{AtomicUsize, Ordering}, - Arc, + Arc, Mutex, }, }; @@ -13,7 +13,7 @@ use { pub struct NativeConfig { pub core_allocation: CoreAllocation, pub max_threads: usize, - pub priority: usize, + pub priority: u8, pub name_base: String, pub stack_size_bytes: usize, } @@ -97,20 +97,16 @@ impl NativeThreadRuntime { if spawned >= self.config.max_threads { bail!("All allowed threads in this pool are already spawned"); } - let core_set: Vec<_> = match self.config.core_allocation { - CoreAllocation::PinnedCores { min: _, max: _ } => { - todo!("Need to store pinning mask somewhere"); - } - CoreAllocation::DedicatedCoreSet { min, max } => (min..max).collect(), - CoreAllocation::OsDefault => (0..affinity::get_core_num()).collect(), - }; + let core_alloc = self.config.core_allocation.clone(); + let priority = self.config.priority; + let chosen_cores_mask = Mutex::new(self.config.core_allocation.as_core_mask_vector()); let n = self.id_count.fetch_add(1, Ordering::SeqCst); let jh = std::thread::Builder::new() .name(format!("{}-{}", &self.config.name_base, n)) .stack_size(self.config.stack_size_bytes) .spawn(move || { - affinity::set_thread_affinity(core_set).expect("Can not set thread affinity"); + apply_policy(&core_alloc, priority, &chosen_cores_mask); f() })?; self.running_count.fetch_add(1, Ordering::SeqCst); diff --git a/thread-manager/src/policy.rs b/thread-manager/src/policy.rs index 357b7f2b2a5b40..aa5b3e9aef6a6c 100644 --- a/thread-manager/src/policy.rs +++ b/thread-manager/src/policy.rs @@ -1,4 +1,7 @@ -use serde::{Deserialize, Serialize}; +use { + serde::{Deserialize, Serialize}, + thread_priority::ThreadExt, +}; #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub enum CoreAllocation { @@ -23,4 +26,35 @@ impl CoreAllocation { } ///Applies policy to the calling thread -pub fn apply_policy(alloc: &CoreAllocation, priority: u32) {} +pub fn apply_policy( + alloc: &CoreAllocation, + priority: u8, + chosen_cores_mask: &std::sync::Mutex<Vec<usize>>, +) { + std::thread::current() + .set_priority(thread_priority::ThreadPriority::Crossplatform( + (priority).try_into().unwrap(), + )) + .expect("Can not set thread priority!"); + + match alloc { + CoreAllocation::PinnedCores { min: _, max: _ } => { + let mut lg = chosen_cores_mask + .lock() + .expect("Can not lock core mask mutex"); + let core = lg + .pop() + .expect("Not enough cores provided for pinned allocation"); + affinity::set_thread_affinity([core]) + .expect("Can not set thread affinity for runtime worker"); + } + CoreAllocation::DedicatedCoreSet { min: _, max: _ } => { + let lg = chosen_cores_mask + .lock() + .expect("Can not lock core mask mutex"); + affinity::set_thread_affinity(&(*lg)) + .expect("Can not set thread affinity for runtime worker"); + } + CoreAllocation::OsDefault => {} + } +} diff --git a/thread-manager/src/rayon_runtime.rs b/thread-manager/src/rayon_runtime.rs index bc050be58e8f6b..242cf9f2458f6a 100644 --- a/thread-manager/src/rayon_runtime.rs +++ b/thread-manager/src/rayon_runtime.rs @@ -2,13 +2,14 @@ use { crate::policy::{apply_policy, CoreAllocation}, anyhow::Ok, serde::{Deserialize, Serialize}, + std::sync::Mutex, }; #[derive(Clone, Debug, Serialize, Deserialize)] #[serde(default)] pub struct RayonConfig { pub worker_threads: usize, - pub priority: u32, + pub priority: u8, pub stack_size_bytes: usize, pub core_allocation: CoreAllocation, } @@ -31,13 +32,14 @@ pub struct RayonRuntime { } impl RayonRuntime { - fn new(config: RayonConfig) -> anyhow::Result<Self> { + pub fn new(config: RayonConfig) -> anyhow::Result<Self> { let policy = config.core_allocation.clone(); + let chosen_cores_mask = Mutex::new(policy.as_core_mask_vector()); let priority = config.priority; let rayon_pool = rayon::ThreadPoolBuilder::new() .num_threads(config.worker_threads) - .start_handler(move |idx| { - apply_policy(&policy, priority); + .start_handler(move |_idx| { + apply_policy(&policy, priority, &chosen_cores_mask); }) .build()?; Ok(Self { rayon_pool, config }) diff --git a/thread-manager/src/tokio_runtime.rs b/thread-manager/src/tokio_runtime.rs index e5d2efda70b2e6..ac8eb3d38f1dc3 100644 --- a/thread-manager/src/tokio_runtime.rs +++ b/thread-manager/src/tokio_runtime.rs @@ -1,5 +1,5 @@ use { - crate::policy::CoreAllocation, + crate::policy::{apply_policy, CoreAllocation}, serde::{Deserialize, Serialize}, std::{ future::Future, @@ -18,7 +18,7 @@ pub struct TokioConfig { pub worker_threads: usize, ///max number of blocking threads tokio is allowed to spawn pub max_blocking_threads: usize, - pub priority: u32, + pub priority: u8, pub stack_size_bytes: usize, pub event_interval: u32, pub core_allocation: CoreAllocation, @@ -83,35 +83,10 @@ impl TokioRuntime { let _tid = cur_thread .get_native_id() .expect("Can not get thread id for newly created thread"); - let tname = cur_thread.name().unwrap(); + // todo - tracing + //let tname = cur_thread.name().unwrap(); //println!("thread {tname} id {tid} started"); - std::thread::current() - .set_priority(thread_priority::ThreadPriority::Crossplatform( - (c.priority as u8).try_into().unwrap(), - )) - .expect("Can not set thread priority!"); - - match c.core_allocation { - CoreAllocation::PinnedCores { min: _, max: _ } => { - let mut lg = chosen_cores_mask - .lock() - .expect("Can not lock core mask mutex"); - let core = lg - .pop() - .expect("Not enough cores provided for pinned allocation"); - println!("Pinning worker {tname} to core {core}"); - affinity::set_thread_affinity([core]) - .expect("Can not set thread affinity for runtime worker"); - } - CoreAllocation::DedicatedCoreSet { min: _, max: _ } => { - let lg = chosen_cores_mask - .lock() - .expect("Can not lock core mask mutex"); - affinity::set_thread_affinity(&(*lg)) - .expect("Can not set thread affinity for runtime worker"); - } - CoreAllocation::OsDefault => {} - } + apply_policy(&c.core_allocation, c.priority, &chosen_cores_mask); }); Ok(TokioRuntime { tokio: builder.build()?, From 6e98c228be897184de812cf14050869ab5ff24fa Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Sat, 7 Dec 2024 23:08:44 +0000 Subject: [PATCH 4/9] metrics for thread manager --- Cargo.lock | 3 + thread-manager/Cargo.toml | 7 ++- thread-manager/README.md | 18 ++++-- .../examples/core_contention_basics.rs | 8 +-- thread-manager/src/lib.rs | 62 +++++++------------ thread-manager/src/native_thread_runtime.rs | 21 ++++--- thread-manager/src/policy.rs | 12 ++-- thread-manager/src/rayon_runtime.rs | 9 ++- thread-manager/src/tokio_runtime.rs | 55 ++++++++++++++-- 9 files changed, 126 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8c3a02713a6af8..512798bb685981 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -251,9 +251,12 @@ dependencies = [ "affinity", "anyhow", "axum 0.7.9", + "log", + "num_cpus", "rayon", "serde", "serde_json", + "solana-metrics", "thread-priority", "tokio", ] diff --git a/thread-manager/Cargo.toml b/thread-manager/Cargo.toml index f127842053da57..f7d021b263a4f6 100644 --- a/thread-manager/Cargo.toml +++ b/thread-manager/Cargo.toml @@ -12,12 +12,17 @@ edition = { workspace = true } publish = false [dependencies] -affinity = "0.1.2" anyhow = { workspace = true } +log = { workspace = true } serde = { workspace = true, features = ["derive"] } thread-priority = "1.2.0" tokio = { workspace = true, features = ["time", "rt-multi-thread"] } rayon = { workspace = true } +num_cpus.workspace = true +solana-metrics.workspace = true + +[target.'cfg(target_os = "linux")'.dependencies] +affinity = "0.1.2" [dev-dependencies] axum = "0.7.9" diff --git a/thread-manager/README.md b/thread-manager/README.md index 5a75d1dfef28bb..f9e2cbe402a37e 100644 --- a/thread-manager/README.md +++ b/thread-manager/README.md @@ -1,6 +1,11 @@ # thread-manager -Balances machine resources between multiple threaded runtimes. The purpose is to manage thread contention between different parts of the code that may -benefit from a diverse set of management options. For example, we may want to have cores 1-4 handling networking via Tokio, core 5 handling file IO via Tokio, cores 9-16 hallocated for Rayon thread pool, and cores 6-8 available for general use by std::thread. This will minimize contention for CPU caches and context switches that would occur if Rayon was entirely unaware it was running side-by-side with tokio, and each was to spawn as many threads as there are cores. +Balances machine resources between multiple threaded runtimes. The purpose is to manage thread contention +between different parts of the code that may +benefit from a diverse set of management options. For example, we may want to have cores 1-4 handling +networking via Tokio, core 5 handling file IO via Tokio, cores 9-16 hallocated for Rayon thread pool, +and cores 6-8 available for general use by std::thread. This will minimize contention for CPU caches +and context switches that would occur if Rayon was entirely unaware it was running side-by-side with +tokio, and each was to spawn as many threads as there are cores. # Supported threading models ## Tokio @@ -8,19 +13,24 @@ Multiple tokio runtimes can be created, and each may be assigned its own pool of Number of worker and blocking threads is configurable, as are thread priorities for the pool. ## Native -Native threads (std::thread) can be spawned from managed pools, this allows them to inheirt a particular affinity from the pool, as well as to +Native threads (std::thread) can be spawned from managed pools, this allows them to inheirt a particular +affinity from the pool, as well as to control the total number of threads made in every pool. ## Rayon -Rayon already manages thread pools well enough, all thread_manager does on top is enforce affinity and priority for rayon threads. Normally one would only ever have one rayon pool, but for priority allocations one may want to spawn many rayon pools. +Rayon already manages thread pools well enough, all thread_manager does on top is enforce affinity and +priority for rayon threads. Normally one would only ever have one rayon pool, but for priority allocations +one may want to spawn many rayon pools. # Limitations * Thread pools can only be created at process startup * Once thread pool is created, its policy can not be modified at runtime + * Thread affinity not supported outside of linux # TODO: * support tracing + * better metrics integration * proper error handling everywhere * even more tests diff --git a/thread-manager/examples/core_contention_basics.rs b/thread-manager/examples/core_contention_basics.rs index ff1bf7c9fc4279..36a750fc872f69 100644 --- a/thread-manager/examples/core_contention_basics.rs +++ b/thread-manager/examples/core_contention_basics.rs @@ -35,15 +35,9 @@ async fn axum_main(port: u16) { } } } -use {affinity::*, agave_thread_manager::*}; +use agave_thread_manager::*; fn main() -> anyhow::Result<()> { - println!( - "\tCurrent thread affinity : {:?}", - get_thread_affinity().unwrap() - ); - println!("\tTotal cores : {}", get_core_num()); - let experiments = [ "examples/core_contention_dedicated_set.json", "examples/core_contention_contending_set.json", diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs index 87e1393e4e630b..ea0c554b0273f7 100644 --- a/thread-manager/src/lib.rs +++ b/thread-manager/src/lib.rs @@ -66,8 +66,10 @@ impl RuntimeManager { } }; - if let Err(e) = affinity::set_thread_affinity(&chosen_cores_mask) { - anyhow::bail!(e.to_string()) + if cfg!(target_os = "linux") { + if let Err(e) = affinity::set_thread_affinity(&chosen_cores_mask) { + anyhow::bail!(e.to_string()) + } } Ok(chosen_cores_mask) } @@ -129,6 +131,14 @@ mod tests { std::collections::HashMap, }; + fn validate_affinity(expect_cores: &[usize], error_msg: &str) { + // Nobody runs Agave on windows, and on Mac we can not set mask affinity without patching external crate + if cfg!(target_os = "linux") { + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!(aff, expect_cores, "{}", error_msg); + } + } + #[test] fn process_affinity() { let conf = RuntimeManagerConfig { @@ -151,21 +161,17 @@ mod tests { let t2 = r .spawn(|| { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!(aff, [0, 1, 2, 3], "Managed thread allocation should be 0-3"); + validate_affinity(&[0, 1, 2, 3], "Managed thread allocation should be 0-3"); }) .unwrap(); let t = std::thread::spawn(|| { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!(aff, [4, 5, 6, 7], "Default thread allocation should be 4-7"); + validate_affinity(&[4, 5, 6, 7], "Default thread allocation should be 4-7"); let tt = std::thread::spawn(|| { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!( - aff, - [4, 5, 6, 7], - "Nested thread allocation should still be 4-7" + validate_affinity( + &[4, 5, 6, 7], + "Nested thread allocation should still be 4-7", ); }); tt.join().unwrap(); @@ -173,6 +179,7 @@ mod tests { t.join().unwrap(); t2.join().unwrap(); } + #[test] fn rayon_affinity() { let conf = RuntimeManagerConfig { @@ -206,46 +213,25 @@ mod tests { let t2 = r .spawn(|| { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!(aff, [0, 1, 2, 3], "Managed thread allocation should be 0-3"); + validate_affinity(&[0, 1, 2, 3], "Managed thread allocation should be 0-3"); }) .unwrap(); let rrt = rtm.get_rayon("test").unwrap(); - /*.spawn_handler(|thread| { - let mut b = std::thread::Builder::new(); - if let Some(name) = thread.name() { - b = b.name(name.to_owned()); - } - if let Some(stack_size) = thread.stack_size() { - b = b.stack_size(stack_size); - } - b.spawn(|| thread.run())?; - Ok(()) - })*/ - let t = std::thread::spawn(|| { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!(aff, [4, 5, 6, 7], "Default thread allocation should be 4-7"); + validate_affinity(&[4, 5, 6, 7], "Default thread allocation should be 4-7"); let tt = std::thread::spawn(|| { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!( - aff, - [4, 5, 6, 7], - "Nested thread allocation should still be 4-7" + validate_affinity( + &[4, 5, 6, 7], + "Nested thread allocation should still be 4-7", ); }); tt.join().unwrap(); }); let _rr = rrt.rayon_pool.broadcast(|ctx| { - let aff = affinity::get_thread_affinity().unwrap(); println!("Rayon thread {} reporting", ctx.index()); - assert_eq!( - aff, - [1, 2, 3], - "Rayon thread allocation should still be 1-3" - ); + validate_affinity(&[1, 2, 3], "Rayon thread allocation should still be 1-3"); }); t.join().unwrap(); t2.join().unwrap(); diff --git a/thread-manager/src/native_thread_runtime.rs b/thread-manager/src/native_thread_runtime.rs index a8ce5da516e819..9653e68290a3d0 100644 --- a/thread-manager/src/native_thread_runtime.rs +++ b/thread-manager/src/native_thread_runtime.rs @@ -2,6 +2,7 @@ use { crate::policy::{apply_policy, CoreAllocation}, anyhow::bail, serde::{Deserialize, Serialize}, + solana_metrics::datapoint_info, std::sync::{ atomic::{AtomicUsize, Ordering}, Arc, Mutex, @@ -44,18 +45,17 @@ pub struct JoinHandle<T> { impl<T> JoinHandle<T> { fn join_inner(&mut self) -> Result<T, Box<dyn core::any::Any + Send + 'static>> { - let r = match self.std_handle.take() { + match self.std_handle.take() { Some(jh) => { - let r = jh.join(); - self.running_count.fetch_sub(1, Ordering::SeqCst); - r + let result = jh.join(); + let rc = self.running_count.fetch_sub(1, Ordering::Relaxed); + datapoint_info!("thread-manager-native", ("threads-running", rc, i64),); + result } None => { panic!("Thread already joined"); } - }; - dbg!(self.std_handle.is_some()); - r + } } pub fn join(mut self) -> Result<T, Box<dyn core::any::Any + Send + 'static>> { @@ -93,7 +93,7 @@ impl NativeThreadRuntime { F: Send + 'static, T: Send + 'static, { - let spawned = self.running_count.load(Ordering::SeqCst); + let spawned = self.running_count.load(Ordering::Relaxed); if spawned >= self.config.max_threads { bail!("All allowed threads in this pool are already spawned"); } @@ -101,7 +101,7 @@ impl NativeThreadRuntime { let core_alloc = self.config.core_allocation.clone(); let priority = self.config.priority; let chosen_cores_mask = Mutex::new(self.config.core_allocation.as_core_mask_vector()); - let n = self.id_count.fetch_add(1, Ordering::SeqCst); + let n = self.id_count.fetch_add(1, Ordering::Relaxed); let jh = std::thread::Builder::new() .name(format!("{}-{}", &self.config.name_base, n)) .stack_size(self.config.stack_size_bytes) @@ -109,7 +109,8 @@ impl NativeThreadRuntime { apply_policy(&core_alloc, priority, &chosen_cores_mask); f() })?; - self.running_count.fetch_add(1, Ordering::SeqCst); + let rc = self.running_count.fetch_add(1, Ordering::Relaxed); + datapoint_info!("thread-manager-native", ("threads-running", rc as i64, i64),); Ok(JoinHandle { std_handle: Some(jh), running_count: self.running_count.clone(), diff --git a/thread-manager/src/policy.rs b/thread-manager/src/policy.rs index aa5b3e9aef6a6c..15576e9ce372f0 100644 --- a/thread-manager/src/policy.rs +++ b/thread-manager/src/policy.rs @@ -45,15 +45,19 @@ pub fn apply_policy( let core = lg .pop() .expect("Not enough cores provided for pinned allocation"); - affinity::set_thread_affinity([core]) - .expect("Can not set thread affinity for runtime worker"); + if cfg!(target_os = "linux") { + affinity::set_thread_affinity([core]) + .expect("Can not set thread affinity for runtime worker"); + } } CoreAllocation::DedicatedCoreSet { min: _, max: _ } => { let lg = chosen_cores_mask .lock() .expect("Can not lock core mask mutex"); - affinity::set_thread_affinity(&(*lg)) - .expect("Can not set thread affinity for runtime worker"); + if cfg!(target_os = "linux") { + affinity::set_thread_affinity(&(*lg)) + .expect("Can not set thread affinity for runtime worker"); + } } CoreAllocation::OsDefault => {} } diff --git a/thread-manager/src/rayon_runtime.rs b/thread-manager/src/rayon_runtime.rs index 242cf9f2458f6a..f1a106a4453657 100644 --- a/thread-manager/src/rayon_runtime.rs +++ b/thread-manager/src/rayon_runtime.rs @@ -2,7 +2,11 @@ use { crate::policy::{apply_policy, CoreAllocation}, anyhow::Ok, serde::{Deserialize, Serialize}, - std::sync::Mutex, + solana_metrics::datapoint_info, + std::sync::{ + atomic::{AtomicI64, Ordering}, + Mutex, + }, }; #[derive(Clone, Debug, Serialize, Deserialize)] @@ -36,9 +40,12 @@ impl RayonRuntime { let policy = config.core_allocation.clone(); let chosen_cores_mask = Mutex::new(policy.as_core_mask_vector()); let priority = config.priority; + let spawned_threads = AtomicI64::new(0); let rayon_pool = rayon::ThreadPoolBuilder::new() .num_threads(config.worker_threads) .start_handler(move |_idx| { + let rc = spawned_threads.fetch_add(1, Ordering::Relaxed); + datapoint_info!("thread-manager-rayon", ("threads-spawned", rc, i64),); apply_policy(&policy, priority, &chosen_cores_mask); }) .build()?; diff --git a/thread-manager/src/tokio_runtime.rs b/thread-manager/src/tokio_runtime.rs index ac8eb3d38f1dc3..63da2b2f0ebc4b 100644 --- a/thread-manager/src/tokio_runtime.rs +++ b/thread-manager/src/tokio_runtime.rs @@ -1,11 +1,12 @@ use { crate::policy::{apply_policy, CoreAllocation}, serde::{Deserialize, Serialize}, + solana_metrics::datapoint_info, std::{ future::Future, sync::{ - atomic::{AtomicUsize, Ordering}, - Mutex, + atomic::{AtomicI64, AtomicUsize, Ordering}, + Arc, Mutex, }, }, thread_priority::ThreadExt, @@ -37,15 +38,46 @@ impl Default for TokioConfig { } } +#[derive(Debug)] +pub struct ThreadCounters { + pub namespace: &'static str, + pub parked_threads_cnt: AtomicI64, + pub active_threads_cnt: AtomicI64, +} + +impl ThreadCounters { + pub fn on_park(&self) { + let parked = self.parked_threads_cnt.fetch_add(1, Ordering::Relaxed); + let active = self.active_threads_cnt.fetch_sub(1, Ordering::Relaxed); + datapoint_info!( + self.namespace, + ("threads_parked", parked, i64), + ("threads_active", active, i64), + ); + } + + pub fn on_unpark(&self) { + let parked = self.parked_threads_cnt.fetch_sub(1, Ordering::Relaxed); + let active = self.active_threads_cnt.fetch_add(1, Ordering::Relaxed); + datapoint_info!( + self.namespace, + ("threads_parked", parked, i64), + ("threads_active", active, i64), + ); + } +} + #[derive(Debug)] pub struct TokioRuntime { pub(crate) tokio: tokio::runtime::Runtime, pub config: TokioConfig, + pub counters: Arc<ThreadCounters>, } + impl TokioRuntime { pub(crate) fn new(name: String, cfg: TokioConfig) -> anyhow::Result<Self> { let num_workers = if cfg.worker_threads == 0 { - affinity::get_core_num() + num_cpus::get() } else { cfg.worker_threads }; @@ -65,12 +97,26 @@ impl TokioRuntime { } }; let atomic_id: AtomicUsize = AtomicUsize::new(0); + + let counters = Arc::new(ThreadCounters { + namespace: format!("thread-manager-tokio-{}", &base_name).leak(), // no workaround, metrics crate will only consume 'static str + parked_threads_cnt: AtomicI64::new(0), + active_threads_cnt: AtomicI64::new(0), + }); + let counters_clone1 = counters.clone(); + let counters_clone2 = counters.clone(); builder .event_interval(cfg.event_interval) .thread_name_fn(move || { - let id = atomic_id.fetch_add(1, Ordering::SeqCst); + let id = atomic_id.fetch_add(1, Ordering::Relaxed); format!("{}-{}", base_name, id) }) + .on_thread_park(move || { + counters_clone1.on_park(); + }) + .on_thread_unpark(move || { + counters_clone2.on_unpark(); + }) .thread_stack_size(cfg.stack_size_bytes) .enable_all() .max_blocking_threads(cfg.max_blocking_threads); @@ -91,6 +137,7 @@ impl TokioRuntime { Ok(TokioRuntime { tokio: builder.build()?, config: cfg.clone(), + counters, }) } /* This is bad idea... From 673894d48bb45d2e0aca912fdd8fdc39761fe975 Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Sun, 8 Dec 2024 06:05:05 +0000 Subject: [PATCH 5/9] fix macos build --- thread-manager/Cargo.toml | 6 +++--- thread-manager/src/lib.rs | 25 ++++++++----------------- thread-manager/src/policy.rs | 18 ++++++++++-------- 3 files changed, 21 insertions(+), 28 deletions(-) diff --git a/thread-manager/Cargo.toml b/thread-manager/Cargo.toml index f7d021b263a4f6..66cf3c8d5e1600 100644 --- a/thread-manager/Cargo.toml +++ b/thread-manager/Cargo.toml @@ -14,12 +14,12 @@ publish = false [dependencies] anyhow = { workspace = true } log = { workspace = true } +num_cpus ={ workspace = true } +rayon = { workspace = true } serde = { workspace = true, features = ["derive"] } +solana-metrics ={ workspace = true } thread-priority = "1.2.0" tokio = { workspace = true, features = ["time", "rt-multi-thread"] } -rayon = { workspace = true } -num_cpus.workspace = true -solana-metrics.workspace = true [target.'cfg(target_os = "linux")'.dependencies] affinity = "0.1.2" diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs index ea0c554b0273f7..b6a88374a42825 100644 --- a/thread-manager/src/lib.rs +++ b/thread-manager/src/lib.rs @@ -58,19 +58,9 @@ impl RuntimeManager { self.tokio_runtimes.get(n) } pub fn set_process_affinity(config: &RuntimeManagerConfig) -> anyhow::Result<Vec<usize>> { - let chosen_cores_mask: Vec<usize> = { - match config.default_core_allocation { - CoreAllocation::PinnedCores { min, max } => (min..max).collect(), - CoreAllocation::DedicatedCoreSet { min, max } => (min..max).collect(), - CoreAllocation::OsDefault => vec![], - } - }; + let chosen_cores_mask = config.default_core_allocation.as_core_mask_vector(); - if cfg!(target_os = "linux") { - if let Err(e) = affinity::set_thread_affinity(&chosen_cores_mask) { - anyhow::bail!(e.to_string()) - } - } + crate::policy::set_thread_affinity(&chosen_cores_mask); Ok(chosen_cores_mask) } @@ -131,13 +121,14 @@ mod tests { std::collections::HashMap, }; + // Nobody runs Agave on windows, and on Mac we can not set mask affinity without patching external crate + #[cfg(target_os = "linux")] fn validate_affinity(expect_cores: &[usize], error_msg: &str) { - // Nobody runs Agave on windows, and on Mac we can not set mask affinity without patching external crate - if cfg!(target_os = "linux") { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!(aff, expect_cores, "{}", error_msg); - } + let aff = affinity::get_thread_affinity().unwrap(); + assert_eq!(aff, expect_cores, "{}", error_msg); } + #[cfg(not(target_os = "linux"))] + fn validate_affinity(_expect_cores: &[usize], _error_msg: &str) {} #[test] fn process_affinity() { diff --git a/thread-manager/src/policy.rs b/thread-manager/src/policy.rs index 15576e9ce372f0..828745d80372cd 100644 --- a/thread-manager/src/policy.rs +++ b/thread-manager/src/policy.rs @@ -25,6 +25,14 @@ impl CoreAllocation { } } +#[cfg(target_os = "linux")] +pub fn set_thread_affinity(cores: &[usize]) { + affinity::set_thread_affinity(cores).expect("Can not set thread affinity for runtime worker"); +} + +#[cfg(not(target_os = "linux"))] +pub fn set_thread_affinity(_cores: &[usize]) {} + ///Applies policy to the calling thread pub fn apply_policy( alloc: &CoreAllocation, @@ -45,19 +53,13 @@ pub fn apply_policy( let core = lg .pop() .expect("Not enough cores provided for pinned allocation"); - if cfg!(target_os = "linux") { - affinity::set_thread_affinity([core]) - .expect("Can not set thread affinity for runtime worker"); - } + set_thread_affinity(&[core]); } CoreAllocation::DedicatedCoreSet { min: _, max: _ } => { let lg = chosen_cores_mask .lock() .expect("Can not lock core mask mutex"); - if cfg!(target_os = "linux") { - affinity::set_thread_affinity(&(*lg)) - .expect("Can not set thread affinity for runtime worker"); - } + set_thread_affinity(&lg); } CoreAllocation::OsDefault => {} } From 268bffd3be18bed11fc8b04c7300cad21583e102 Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Wed, 11 Dec 2024 22:20:10 +0000 Subject: [PATCH 6/9] updates to match the needs of the integration process --- thread-manager/Cargo.toml | 5 +- .../examples/core_contention_basics.rs | 13 +-- .../core_contention_contending_set.json | 31 ------- .../core_contention_contending_set.toml | 13 +++ .../core_contention_dedicated_set.json | 31 ------- .../core_contention_dedicated_set.toml | 13 +++ .../core_contention_single_runtime.json | 20 ----- .../examples/core_contention_sweep.rs | 10 +-- thread-manager/src/lib.rs | 80 +++++++++++++------ thread-manager/src/native_thread_runtime.rs | 12 +-- thread-manager/src/rayon_runtime.rs | 3 +- thread-manager/src/tokio_runtime.rs | 36 ++------- 12 files changed, 111 insertions(+), 156 deletions(-) delete mode 100644 thread-manager/examples/core_contention_contending_set.json create mode 100644 thread-manager/examples/core_contention_contending_set.toml delete mode 100644 thread-manager/examples/core_contention_dedicated_set.json create mode 100644 thread-manager/examples/core_contention_dedicated_set.toml delete mode 100644 thread-manager/examples/core_contention_single_runtime.json diff --git a/thread-manager/Cargo.toml b/thread-manager/Cargo.toml index 66cf3c8d5e1600..265eb3aafd97fb 100644 --- a/thread-manager/Cargo.toml +++ b/thread-manager/Cargo.toml @@ -14,10 +14,10 @@ publish = false [dependencies] anyhow = { workspace = true } log = { workspace = true } -num_cpus ={ workspace = true } +num_cpus = { workspace = true } rayon = { workspace = true } serde = { workspace = true, features = ["derive"] } -solana-metrics ={ workspace = true } +solana-metrics = { workspace = true } thread-priority = "1.2.0" tokio = { workspace = true, features = ["time", "rt-multi-thread"] } @@ -27,3 +27,4 @@ affinity = "0.1.2" [dev-dependencies] axum = "0.7.9" serde_json = { workspace = true } +toml = { workspace = true } diff --git a/thread-manager/examples/core_contention_basics.rs b/thread-manager/examples/core_contention_basics.rs index 36a750fc872f69..219712df060a68 100644 --- a/thread-manager/examples/core_contention_basics.rs +++ b/thread-manager/examples/core_contention_basics.rs @@ -1,6 +1,6 @@ use std::{ future::IntoFuture, - io::Write, + io::{Read, Write}, net::{IpAddr, Ipv4Addr, SocketAddr}, path::PathBuf, time::Duration, @@ -48,11 +48,12 @@ fn main() -> anyhow::Result<()> { println!("Running {exp}"); let mut conffile = PathBuf::from(env!("CARGO_MANIFEST_DIR")); conffile.push(exp); - let conffile = std::fs::File::open(conffile)?; - let cfg: RuntimeManagerConfig = serde_json::from_reader(conffile)?; + let mut buf = String::new(); + std::fs::File::open(conffile)?.read_to_string(&mut buf)?; + let cfg: RuntimeManagerConfig = toml::from_str(&buf)?; //println!("Loaded config {}", serde_json::to_string_pretty(&cfg)?); - let rtm = RuntimeManager::new(cfg).unwrap(); + let rtm = ThreadManager::new(cfg).unwrap(); let tok1 = rtm .get_tokio("axum1") .expect("Expecting runtime named axum1"); @@ -63,10 +64,10 @@ fn main() -> anyhow::Result<()> { let wrk_cores: Vec<_> = (32..64).collect(); let results = std::thread::scope(|s| { s.spawn(|| { - tok1.start(axum_main(8888)); + tok1.tokio.block_on(axum_main(8888)); }); s.spawn(|| { - tok2.start(axum_main(8889)); + tok2.tokio.block_on(axum_main(8889)); }); let jh = s.spawn(|| run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap()); jh.join().expect("WRK crashed!") diff --git a/thread-manager/examples/core_contention_contending_set.json b/thread-manager/examples/core_contention_contending_set.json deleted file mode 100644 index 1225cc8e494b0f..00000000000000 --- a/thread-manager/examples/core_contention_contending_set.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "tokio_configs": { - "tokio1": { - "worker_threads": 8, - "max_blocking_threads": 1, - "priority": 0, - "core_allocation": { - "DedicatedCoreSet": { - "min": 0, - "max": 8 - } - } - }, - "tokio2": { - "worker_threads": 8, - "max_blocking_threads": 1, - "priority": 0, - "core_allocation": { - "DedicatedCoreSet": { - "min": 0, - "max": 8 - } - } - } - }, - "tokio_runtime_mapping": { - "axum2": "tokio2", - "axum1": "tokio1" - }, - "native_configs": {} -} diff --git a/thread-manager/examples/core_contention_contending_set.toml b/thread-manager/examples/core_contention_contending_set.toml new file mode 100644 index 00000000000000..e383987a5a432c --- /dev/null +++ b/thread-manager/examples/core_contention_contending_set.toml @@ -0,0 +1,13 @@ +[native_configs] + +[rayon_configs] + +[tokio_configs.axum1] +worker_threads = 8 +max_blocking_threads = 1 +core_allocation.DedicatedCoreSet = { min = 0, max = 8 } + +[tokio_configs.axum2] +worker_threads = 8 +max_blocking_threads = 1 +core_allocation.DedicatedCoreSet = { min = 0, max = 8 } diff --git a/thread-manager/examples/core_contention_dedicated_set.json b/thread-manager/examples/core_contention_dedicated_set.json deleted file mode 100644 index 4e9c76170cf7cf..00000000000000 --- a/thread-manager/examples/core_contention_dedicated_set.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "tokio_configs": { - "tokio1": { - "worker_threads": 4, - "max_blocking_threads": 1, - "priority": 0, - "core_allocation": { - "DedicatedCoreSet": { - "min": 0, - "max": 4 - } - } - }, - "tokio2": { - "worker_threads": 4, - "max_blocking_threads": 1, - "priority": 0, - "core_allocation": { - "DedicatedCoreSet": { - "min": 4, - "max": 8 - } - } - } - }, - "tokio_runtime_mapping": { - "axum2": "tokio2", - "axum1": "tokio1" - }, - "native_configs": {} -} diff --git a/thread-manager/examples/core_contention_dedicated_set.toml b/thread-manager/examples/core_contention_dedicated_set.toml new file mode 100644 index 00000000000000..a82af7d9f5fd47 --- /dev/null +++ b/thread-manager/examples/core_contention_dedicated_set.toml @@ -0,0 +1,13 @@ +[native_configs] + +[rayon_configs] + +[tokio_configs.axum1] +worker_threads = 4 +max_blocking_threads = 1 +core_allocation.DedicatedCoreSet = { min = 0, max = 4 } + +[tokio_configs.axum2] +worker_threads = 4 +max_blocking_threads = 1 +core_allocation.DedicatedCoreSet = { min = 4, max = 8 } diff --git a/thread-manager/examples/core_contention_single_runtime.json b/thread-manager/examples/core_contention_single_runtime.json deleted file mode 100644 index 42d743a188cc35..00000000000000 --- a/thread-manager/examples/core_contention_single_runtime.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "tokio_configs": { - "tokio1": { - "worker_threads": 8, - "max_blocking_threads": 1, - "priority": 0, - "core_allocation": { - "DedicatedCoreSet": { - "min": 0, - "max": 8 - } - } - } - }, - "tokio_runtime_mapping": { - "axum2": "tokio1", - "axum1": "tokio1" - }, - "native_configs": {} -} diff --git a/thread-manager/examples/core_contention_sweep.rs b/thread-manager/examples/core_contention_sweep.rs index f160ddf3886d4e..51ba4c08e714bd 100644 --- a/thread-manager/examples/core_contention_sweep.rs +++ b/thread-manager/examples/core_contention_sweep.rs @@ -112,7 +112,7 @@ fn main() -> anyhow::Result<()> { println!("Running {core_cnt} cores under {regime:?}"); let (tok1, tok2) = match regime { Regime::Shared => { - rtm = RuntimeManager::new(make_config_shared(core_cnt)).unwrap(); + rtm = ThreadManager::new(make_config_shared(core_cnt)).unwrap(); ( rtm.get_tokio("axum1") .expect("Expecting runtime named axum1"), @@ -121,7 +121,7 @@ fn main() -> anyhow::Result<()> { ) } Regime::Dedicated => { - rtm = RuntimeManager::new(make_config_dedicated(core_cnt)).unwrap(); + rtm = ThreadManager::new(make_config_dedicated(core_cnt)).unwrap(); ( rtm.get_tokio("axum1") .expect("Expecting runtime named axum1"), @@ -130,7 +130,7 @@ fn main() -> anyhow::Result<()> { ) } Regime::Single => { - rtm = RuntimeManager::new(make_config_shared(core_cnt)).unwrap(); + rtm = ThreadManager::new(make_config_shared(core_cnt)).unwrap(); ( rtm.get_tokio("axum1") .expect("Expecting runtime named axum1"), @@ -143,7 +143,7 @@ fn main() -> anyhow::Result<()> { let wrk_cores: Vec<_> = (32..64).collect(); let results = std::thread::scope(|s| { s.spawn(|| { - tok1.start(axum_main(8888)); + tok1.tokio.spawn(axum_main(8888)); }); let jh = match regime { Regime::Single => s.spawn(|| { @@ -151,7 +151,7 @@ fn main() -> anyhow::Result<()> { }), _ => { s.spawn(|| { - tok2.start(axum_main(8889)); + tok2.tokio.spawn(axum_main(8889)); }); s.spawn(|| { run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap() diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs index b6a88374a42825..c348ef525baa05 100644 --- a/thread-manager/src/lib.rs +++ b/thread-manager/src/lib.rs @@ -10,7 +10,7 @@ pub mod rayon_runtime; pub mod tokio_runtime; pub use { - native_thread_runtime::{NativeConfig, NativeThreadRuntime}, + native_thread_runtime::{JoinHandle, NativeConfig, NativeThreadRuntime}, policy::CoreAllocation, rayon_runtime::{RayonConfig, RayonRuntime}, tokio_runtime::{TokioConfig, TokioRuntime}, @@ -18,7 +18,7 @@ pub use { pub type ConstString = Box<str>; #[derive(Default, Debug)] -pub struct RuntimeManager { +pub struct ThreadManager { pub tokio_runtimes: HashMap<ConstString, TokioRuntime>, pub tokio_runtime_mapping: HashMap<ConstString, ConstString>, @@ -44,7 +44,7 @@ pub struct RuntimeManagerConfig { pub default_core_allocation: CoreAllocation, } -impl RuntimeManager { +impl ThreadManager { pub fn get_native(&self, name: &str) -> Option<&NativeThreadRuntime> { let n = self.native_runtime_mapping.get(name)?; self.native_thread_runtimes.get(n) @@ -64,36 +64,50 @@ impl RuntimeManager { Ok(chosen_cores_mask) } - pub fn new(config: RuntimeManagerConfig) -> anyhow::Result<Self> { - let mut core_allocations = HashMap::<ConstString, Vec<usize>>::new(); - Self::set_process_affinity(&config)?; - let mut manager = Self::default(); + /// Populates mappings with copies of config names, overrides as appropriate + fn populate_mappings(&mut self, config: &RuntimeManagerConfig) { + //TODO: this should probably be cleaned up with a macro at some point... - //TODO: this should probably be cleaned up at some point... - for (k, v) in config.tokio_runtime_mapping.iter() { - manager - .tokio_runtime_mapping - .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); + for name in config.native_configs.keys() { + self.native_runtime_mapping + .insert(name.clone().into_boxed_str(), name.clone().into_boxed_str()); } for (k, v) in config.native_runtime_mapping.iter() { - manager - .native_runtime_mapping + self.native_runtime_mapping .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); } - for (k, v) in config.rayon_runtime_mapping.iter() { - manager - .rayon_runtime_mapping + + for name in config.tokio_configs.keys() { + self.tokio_runtime_mapping + .insert(name.clone().into_boxed_str(), name.clone().into_boxed_str()); + } + for (k, v) in config.tokio_runtime_mapping.iter() { + self.tokio_runtime_mapping .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); } + for name in config.rayon_configs.keys() { + self.rayon_runtime_mapping + .insert(name.clone().into_boxed_str(), name.clone().into_boxed_str()); + } + for (k, v) in config.rayon_runtime_mapping.iter() { + self.rayon_runtime_mapping + .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); + } + } + pub fn new(config: RuntimeManagerConfig) -> anyhow::Result<Self> { + let mut core_allocations = HashMap::<ConstString, Vec<usize>>::new(); + Self::set_process_affinity(&config)?; + let mut manager = Self::default(); + manager.populate_mappings(&config); for (name, cfg) in config.native_configs.iter() { - let nrt = NativeThreadRuntime::new(cfg.clone()); + let nrt = NativeThreadRuntime::new(name.clone(), cfg.clone()); manager .native_thread_runtimes .insert(name.clone().into_boxed_str(), nrt); } for (name, cfg) in config.rayon_configs.iter() { - let rrt = RayonRuntime::new(cfg.clone())?; + let rrt = RayonRuntime::new(name.clone(), cfg.clone())?; manager .rayon_runtimes .insert(name.clone().into_boxed_str(), rrt); @@ -117,10 +131,30 @@ impl RuntimeManager { #[cfg(test)] mod tests { use { - crate::{CoreAllocation, NativeConfig, RayonConfig, RuntimeManager, RuntimeManagerConfig}, - std::collections::HashMap, + crate::{CoreAllocation, NativeConfig, RayonConfig, RuntimeManagerConfig, ThreadManager}, + std::{collections::HashMap, io::Read}, }; + #[test] + fn configtest() { + let experiments = [ + "examples/core_contention_dedicated_set.toml", + "examples/core_contention_contending_set.toml", + ]; + + for exp in experiments { + println!("Loading config {exp}"); + let mut conffile = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); + conffile.push(exp); + let mut buf = String::new(); + std::fs::File::open(conffile) + .unwrap() + .read_to_string(&mut buf) + .unwrap(); + let cfg: RuntimeManagerConfig = toml::from_str(&buf).unwrap(); + println!("{:?}", cfg); + } + } // Nobody runs Agave on windows, and on Mac we can not set mask affinity without patching external crate #[cfg(target_os = "linux")] fn validate_affinity(expect_cores: &[usize], error_msg: &str) { @@ -147,7 +181,7 @@ mod tests { ..Default::default() }; - let rtm = RuntimeManager::new(conf).unwrap(); + let rtm = ThreadManager::new(conf).unwrap(); let r = rtm.get_native("test").unwrap(); let t2 = r @@ -199,7 +233,7 @@ mod tests { ..Default::default() }; - let rtm = RuntimeManager::new(conf).unwrap(); + let rtm = ThreadManager::new(conf).unwrap(); let r = rtm.get_native("test").unwrap(); let t2 = r diff --git a/thread-manager/src/native_thread_runtime.rs b/thread-manager/src/native_thread_runtime.rs index 9653e68290a3d0..6e2925508d1e99 100644 --- a/thread-manager/src/native_thread_runtime.rs +++ b/thread-manager/src/native_thread_runtime.rs @@ -15,7 +15,6 @@ pub struct NativeConfig { pub core_allocation: CoreAllocation, pub max_threads: usize, pub priority: u8, - pub name_base: String, pub stack_size_bytes: usize, } @@ -26,7 +25,6 @@ impl Default for NativeConfig { max_threads: 10, priority: 0, stack_size_bytes: 2 * 1024 * 1024, - name_base: "thread".to_owned(), } } } @@ -36,6 +34,7 @@ pub struct NativeThreadRuntime { pub id_count: AtomicUsize, pub running_count: Arc<AtomicUsize>, pub config: NativeConfig, + pub name: String, } pub struct JoinHandle<T> { @@ -44,7 +43,7 @@ pub struct JoinHandle<T> { } impl<T> JoinHandle<T> { - fn join_inner(&mut self) -> Result<T, Box<dyn core::any::Any + Send + 'static>> { + fn join_inner(&mut self) -> std::thread::Result<T> { match self.std_handle.take() { Some(jh) => { let result = jh.join(); @@ -58,7 +57,7 @@ impl<T> JoinHandle<T> { } } - pub fn join(mut self) -> Result<T, Box<dyn core::any::Any + Send + 'static>> { + pub fn join(mut self) -> std::thread::Result<T> { self.join_inner() } @@ -80,11 +79,12 @@ impl<T> Drop for JoinHandle<T> { } impl NativeThreadRuntime { - pub fn new(cfg: NativeConfig) -> Self { + pub fn new(name: String, cfg: NativeConfig) -> Self { Self { id_count: AtomicUsize::new(0), running_count: Arc::new(AtomicUsize::new(0)), config: cfg, + name, } } pub fn spawn<F, T>(&self, f: F) -> anyhow::Result<JoinHandle<T>> @@ -103,7 +103,7 @@ impl NativeThreadRuntime { let chosen_cores_mask = Mutex::new(self.config.core_allocation.as_core_mask_vector()); let n = self.id_count.fetch_add(1, Ordering::Relaxed); let jh = std::thread::Builder::new() - .name(format!("{}-{}", &self.config.name_base, n)) + .name(format!("{}-{}", &self.name, n)) .stack_size(self.config.stack_size_bytes) .spawn(move || { apply_policy(&core_alloc, priority, &chosen_cores_mask); diff --git a/thread-manager/src/rayon_runtime.rs b/thread-manager/src/rayon_runtime.rs index f1a106a4453657..b731bd83051bcb 100644 --- a/thread-manager/src/rayon_runtime.rs +++ b/thread-manager/src/rayon_runtime.rs @@ -36,13 +36,14 @@ pub struct RayonRuntime { } impl RayonRuntime { - pub fn new(config: RayonConfig) -> anyhow::Result<Self> { + pub fn new(name: String, config: RayonConfig) -> anyhow::Result<Self> { let policy = config.core_allocation.clone(); let chosen_cores_mask = Mutex::new(policy.as_core_mask_vector()); let priority = config.priority; let spawned_threads = AtomicI64::new(0); let rayon_pool = rayon::ThreadPoolBuilder::new() .num_threads(config.worker_threads) + .thread_name(move |i| format!("{}_{}", &name, i)) .start_handler(move |_idx| { let rc = spawned_threads.fetch_add(1, Ordering::Relaxed); datapoint_info!("thread-manager-rayon", ("threads-spawned", rc, i64),); diff --git a/thread-manager/src/tokio_runtime.rs b/thread-manager/src/tokio_runtime.rs index 63da2b2f0ebc4b..3e0682b8c46bd3 100644 --- a/thread-manager/src/tokio_runtime.rs +++ b/thread-manager/src/tokio_runtime.rs @@ -2,12 +2,9 @@ use { crate::policy::{apply_policy, CoreAllocation}, serde::{Deserialize, Serialize}, solana_metrics::datapoint_info, - std::{ - future::Future, - sync::{ - atomic::{AtomicI64, AtomicUsize, Ordering}, - Arc, Mutex, - }, + std::sync::{ + atomic::{AtomicI64, AtomicUsize, Ordering}, + Arc, Mutex, }, thread_priority::ThreadExt, }; @@ -69,13 +66,13 @@ impl ThreadCounters { #[derive(Debug)] pub struct TokioRuntime { - pub(crate) tokio: tokio::runtime::Runtime, + pub tokio: tokio::runtime::Runtime, pub config: TokioConfig, pub counters: Arc<ThreadCounters>, } impl TokioRuntime { - pub(crate) fn new(name: String, cfg: TokioConfig) -> anyhow::Result<Self> { + pub fn new(name: String, cfg: TokioConfig) -> anyhow::Result<Self> { let num_workers = if cfg.worker_threads == 0 { num_cpus::get() } else { @@ -84,10 +81,6 @@ impl TokioRuntime { let chosen_cores_mask = cfg.core_allocation.as_core_mask_vector(); let base_name = name.clone(); - println!( - "Assigning {:?} to runtime {}", - &chosen_cores_mask, &base_name - ); let mut builder = match num_workers { 1 => tokio::runtime::Builder::new_current_thread(), _ => { @@ -140,23 +133,4 @@ impl TokioRuntime { counters, }) } - /* This is bad idea... - pub fn spawn<F>(&self, fut: F)-><F as Future>::Output - where F: Future - { - self.tokio.spawn(fut) - } - pub fn spawn_blocking<F>(&self, fut: F)-><F as Future>::Output - where F: Future - { - self.spawn(fut) - } - */ - pub fn start<F>(&self, fut: F) -> F::Output - where - F: Future, - { - // the thread that calls block_on does not need its affinity messed with here - self.tokio.block_on(fut) - } } From 3c83c09de3d2e33813928907f778d361e12953bc Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Sun, 15 Dec 2024 23:20:47 +0000 Subject: [PATCH 7/9] use proper logging --- Cargo.lock | 1 + thread-manager/README.md | 7 ++ thread-manager/src/lib.rs | 71 ++++++--------------- thread-manager/src/native_thread_runtime.rs | 3 +- 4 files changed, 30 insertions(+), 52 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 512798bb685981..7c33681a993e58 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -259,6 +259,7 @@ dependencies = [ "solana-metrics", "thread-priority", "tokio", + "toml 0.8.12", ] [[package]] diff --git a/thread-manager/README.md b/thread-manager/README.md index f9e2cbe402a37e..61f81f314f2ea6 100644 --- a/thread-manager/README.md +++ b/thread-manager/README.md @@ -34,3 +34,10 @@ one may want to spawn many rayon pools. * better metrics integration * proper error handling everywhere * even more tests + + +# Examples +All examples need wrk for workload generation. Please install it before running. + + * core_contention_basics will demonstrate why core contention is bad, and how thread configs can help + * core_contention_sweep will sweep across a range of core counts to show how benefits scale with core counts diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs index c348ef525baa05..c439432cb20bd0 100644 --- a/thread-manager/src/lib.rs +++ b/thread-manager/src/lib.rs @@ -46,16 +46,16 @@ pub struct RuntimeManagerConfig { impl ThreadManager { pub fn get_native(&self, name: &str) -> Option<&NativeThreadRuntime> { - let n = self.native_runtime_mapping.get(name)?; - self.native_thread_runtimes.get(n) + let name = self.native_runtime_mapping.get(name)?; + self.native_thread_runtimes.get(name) } pub fn get_rayon(&self, name: &str) -> Option<&RayonRuntime> { - let n = self.rayon_runtime_mapping.get(name)?; - self.rayon_runtimes.get(n) + let name = self.rayon_runtime_mapping.get(name)?; + self.rayon_runtimes.get(name) } pub fn get_tokio(&self, name: &str) -> Option<&TokioRuntime> { - let n = self.tokio_runtime_mapping.get(name)?; - self.tokio_runtimes.get(n) + let name = self.tokio_runtime_mapping.get(name)?; + self.tokio_runtimes.get(name) } pub fn set_process_affinity(config: &RuntimeManagerConfig) -> anyhow::Result<Vec<usize>> { let chosen_cores_mask = config.default_core_allocation.as_core_mask_vector(); @@ -158,8 +158,8 @@ mod tests { // Nobody runs Agave on windows, and on Mac we can not set mask affinity without patching external crate #[cfg(target_os = "linux")] fn validate_affinity(expect_cores: &[usize], error_msg: &str) { - let aff = affinity::get_thread_affinity().unwrap(); - assert_eq!(aff, expect_cores, "{}", error_msg); + let affinity = affinity::get_thread_affinity().unwrap(); + assert_eq!(affinity, expect_cores, "{}", error_msg); } #[cfg(not(target_os = "linux"))] fn validate_affinity(_expect_cores: &[usize], _error_msg: &str) {} @@ -181,44 +181,35 @@ mod tests { ..Default::default() }; - let rtm = ThreadManager::new(conf).unwrap(); - let r = rtm.get_native("test").unwrap(); + let manager = ThreadManager::new(conf).unwrap(); + let runtime = manager.get_native("test").unwrap(); - let t2 = r + let thread1 = runtime .spawn(|| { validate_affinity(&[0, 1, 2, 3], "Managed thread allocation should be 0-3"); }) .unwrap(); - let t = std::thread::spawn(|| { + let thread2 = std::thread::spawn(|| { validate_affinity(&[4, 5, 6, 7], "Default thread allocation should be 4-7"); - let tt = std::thread::spawn(|| { + let inner_thread = std::thread::spawn(|| { validate_affinity( &[4, 5, 6, 7], "Nested thread allocation should still be 4-7", ); }); - tt.join().unwrap(); + inner_thread.join().unwrap(); }); - t.join().unwrap(); - t2.join().unwrap(); + thread1.join().unwrap(); + thread2.join().unwrap(); } #[test] fn rayon_affinity() { let conf = RuntimeManagerConfig { - native_configs: HashMap::from([( - "pool1".to_owned(), - NativeConfig { - core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, max: 4 }, - max_threads: 5, - priority: 0, - ..Default::default() - }, - )]), rayon_configs: HashMap::from([( - "rayon1".to_owned(), + "test".to_owned(), RayonConfig { core_allocation: CoreAllocation::DedicatedCoreSet { min: 1, max: 4 }, worker_threads: 3, @@ -227,38 +218,16 @@ mod tests { }, )]), default_core_allocation: CoreAllocation::DedicatedCoreSet { min: 4, max: 8 }, - native_runtime_mapping: HashMap::from([("test".to_owned(), "pool1".to_owned())]), - rayon_runtime_mapping: HashMap::from([("test".to_owned(), "rayon1".to_owned())]), ..Default::default() }; - let rtm = ThreadManager::new(conf).unwrap(); - let r = rtm.get_native("test").unwrap(); + let manager = ThreadManager::new(conf).unwrap(); + let rayon_runtime = manager.get_rayon("test").unwrap(); - let t2 = r - .spawn(|| { - validate_affinity(&[0, 1, 2, 3], "Managed thread allocation should be 0-3"); - }) - .unwrap(); - let rrt = rtm.get_rayon("test").unwrap(); - - let t = std::thread::spawn(|| { - validate_affinity(&[4, 5, 6, 7], "Default thread allocation should be 4-7"); - - let tt = std::thread::spawn(|| { - validate_affinity( - &[4, 5, 6, 7], - "Nested thread allocation should still be 4-7", - ); - }); - tt.join().unwrap(); - }); - let _rr = rrt.rayon_pool.broadcast(|ctx| { + let _rr = rayon_runtime.rayon_pool.broadcast(|ctx| { println!("Rayon thread {} reporting", ctx.index()); validate_affinity(&[1, 2, 3], "Rayon thread allocation should still be 1-3"); }); - t.join().unwrap(); - t2.join().unwrap(); } } diff --git a/thread-manager/src/native_thread_runtime.rs b/thread-manager/src/native_thread_runtime.rs index 6e2925508d1e99..f99db65ae9fe5c 100644 --- a/thread-manager/src/native_thread_runtime.rs +++ b/thread-manager/src/native_thread_runtime.rs @@ -1,6 +1,7 @@ use { crate::policy::{apply_policy, CoreAllocation}, anyhow::bail, + log::error, serde::{Deserialize, Serialize}, solana_metrics::datapoint_info, std::sync::{ @@ -72,7 +73,7 @@ impl<T> JoinHandle<T> { impl<T> Drop for JoinHandle<T> { fn drop(&mut self) { if self.std_handle.is_some() { - println!("Attempting to drop a Join Handle of a running thread will leak thread IDs, please join your managed threads!"); + error!("Attempting to drop a Join Handle of a running thread will leak thread IDs, please join your managed threads!"); self.join_inner().expect("Child thread panicked"); } } From 4e1d728c56e3b997992f36dc6994e0ed09d8294d Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Tue, 17 Dec 2024 20:07:37 +0000 Subject: [PATCH 8/9] periodic sampling of metrics to avoid flood, fixup examples --- Cargo.lock | 5 +- thread-manager/Cargo.toml | 1 + .../examples/core_contention_basics.rs | 63 +++++---- .../examples/core_contention_sweep.rs | 122 ++++++++++-------- thread-manager/src/policy.rs | 21 ++- thread-manager/src/tokio_runtime.rs | 82 +++++++----- 6 files changed, 173 insertions(+), 121 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7c33681a993e58..b70a22fb5addc4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -251,6 +251,7 @@ dependencies = [ "affinity", "anyhow", "axum 0.7.9", + "env_logger", "log", "num_cpus", "rayon", @@ -461,9 +462,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.1" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a30da5c5f2d5e72842e00bcb57657162cdabef0931f40e2deb9b4140440cecd" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anyhow" diff --git a/thread-manager/Cargo.toml b/thread-manager/Cargo.toml index 265eb3aafd97fb..0c760371ddfe0e 100644 --- a/thread-manager/Cargo.toml +++ b/thread-manager/Cargo.toml @@ -26,5 +26,6 @@ affinity = "0.1.2" [dev-dependencies] axum = "0.7.9" +env_logger = { workspace = true } serde_json = { workspace = true } toml = { workspace = true } diff --git a/thread-manager/examples/core_contention_basics.rs b/thread-manager/examples/core_contention_basics.rs index 219712df060a68..ea481a707893b8 100644 --- a/thread-manager/examples/core_contention_basics.rs +++ b/thread-manager/examples/core_contention_basics.rs @@ -1,9 +1,13 @@ -use std::{ - future::IntoFuture, - io::{Read, Write}, - net::{IpAddr, Ipv4Addr, SocketAddr}, - path::PathBuf, - time::Duration, +use { + agave_thread_manager::*, + log::{debug, info}, + std::{ + future::IntoFuture, + io::{Read, Write}, + net::{IpAddr, Ipv4Addr, SocketAddr}, + path::PathBuf, + time::Duration, + }, }; async fn axum_main(port: u16) { @@ -31,47 +35,50 @@ async fn axum_main(port: u16) { match timeout { Ok(v) => v.unwrap(), Err(_) => { - println!("Terminating server on port {port}"); + info!("Terminating server on port {port}"); } } } -use agave_thread_manager::*; fn main() -> anyhow::Result<()> { + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); let experiments = [ - "examples/core_contention_dedicated_set.json", - "examples/core_contention_contending_set.json", + "examples/core_contention_dedicated_set.toml", + "examples/core_contention_contending_set.toml", ]; for exp in experiments { - println!("==================="); - println!("Running {exp}"); - let mut conffile = PathBuf::from(env!("CARGO_MANIFEST_DIR")); - conffile.push(exp); + info!("==================="); + info!("Running {exp}"); + let mut conf_file = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + conf_file.push(exp); let mut buf = String::new(); - std::fs::File::open(conffile)?.read_to_string(&mut buf)?; + std::fs::File::open(conf_file)?.read_to_string(&mut buf)?; let cfg: RuntimeManagerConfig = toml::from_str(&buf)?; - //println!("Loaded config {}", serde_json::to_string_pretty(&cfg)?); - let rtm = ThreadManager::new(cfg).unwrap(); - let tok1 = rtm + let manager = ThreadManager::new(cfg).unwrap(); + let tokio1 = manager .get_tokio("axum1") .expect("Expecting runtime named axum1"); - let tok2 = rtm + tokio1.start_metrics_sampling(Duration::from_secs(1)); + let tokio2 = manager .get_tokio("axum2") .expect("Expecting runtime named axum2"); + tokio2.start_metrics_sampling(Duration::from_secs(1)); let wrk_cores: Vec<_> = (32..64).collect(); - let results = std::thread::scope(|s| { - s.spawn(|| { - tok1.tokio.block_on(axum_main(8888)); + let results = std::thread::scope(|scope| { + scope.spawn(|| { + tokio1.tokio.block_on(axum_main(8888)); }); - s.spawn(|| { - tok2.tokio.block_on(axum_main(8889)); + scope.spawn(|| { + tokio2.tokio.block_on(axum_main(8889)); }); - let jh = s.spawn(|| run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap()); - jh.join().expect("WRK crashed!") + let join_handle = + scope.spawn(|| run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap()); + join_handle.join().expect("WRK crashed!") }); + //print out the results of the bench run println!("Results are: {:?}", results); } Ok(()) @@ -112,7 +119,7 @@ fn run_wrk( let mut all_latencies = vec![]; let mut all_rps = vec![]; for (out, port) in outs.zip(ports.iter()) { - println!("========================="); + debug!("========================="); std::io::stdout().write_all(&out.stderr)?; let res = str::from_utf8(&out.stdout)?; let mut res = res.lines().last().unwrap().split(' '); @@ -122,7 +129,7 @@ fn run_wrk( let requests: usize = res.next().unwrap().parse()?; let rps = requests as f32 / 10.0; - println!("WRK results for port {port}: {latency:?} {rps}"); + debug!("WRK results for port {port}: {latency:?} {rps}"); all_latencies.push(Duration::from_micros(latency_us)); all_rps.push(rps); } diff --git a/thread-manager/examples/core_contention_sweep.rs b/thread-manager/examples/core_contention_sweep.rs index 51ba4c08e714bd..e466b3bae05086 100644 --- a/thread-manager/examples/core_contention_sweep.rs +++ b/thread-manager/examples/core_contention_sweep.rs @@ -1,15 +1,18 @@ -use std::{ - collections::HashMap, - future::IntoFuture, - io::Write, - net::{IpAddr, Ipv4Addr, SocketAddr}, - path::PathBuf, - time::Duration, +use { + agave_thread_manager::*, + log::{debug, info}, + std::{ + collections::HashMap, + future::IntoFuture, + io::Write, + net::{IpAddr, Ipv4Addr, SocketAddr}, + path::PathBuf, + time::Duration, + }, }; async fn axum_main(port: u16) { use axum::{routing::get, Router}; - // basic handler that responds with a static string async fn root() -> &'static str { tokio::time::sleep(Duration::from_millis(1)).await; @@ -24,6 +27,7 @@ async fn axum_main(port: u16) { tokio::net::TcpListener::bind(SocketAddr::new(IpAddr::V4(Ipv4Addr::UNSPECIFIED), port)) .await .unwrap(); + info!("Server on port {port} ready"); let timeout = tokio::time::timeout( Duration::from_secs(11), axum::serve(listener, app).into_future(), @@ -32,11 +36,10 @@ async fn axum_main(port: u16) { match timeout { Ok(v) => v.unwrap(), Err(_) => { - println!("Terminating server on port {port}"); + info!("Terminating server on port {port}"); } } } -use agave_thread_manager::*; fn make_config_shared(cc: usize) -> RuntimeManagerConfig { let tokio_cfg_1 = TokioConfig { core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, max: cc }, @@ -46,41 +49,33 @@ fn make_config_shared(cc: usize) -> RuntimeManagerConfig { let tokio_cfg_2 = tokio_cfg_1.clone(); RuntimeManagerConfig { tokio_configs: HashMap::from([ - ("tokio1".into(), tokio_cfg_1), - ("tokio2".into(), tokio_cfg_2), - ]), - tokio_runtime_mapping: HashMap::from([ - ("axum1".into(), "tokio1".into()), - ("axum2".into(), "tokio2".into()), + ("axum1".into(), tokio_cfg_1), + ("axum2".into(), tokio_cfg_2), ]), ..Default::default() } } -fn make_config_dedicated(cc: usize) -> RuntimeManagerConfig { +fn make_config_dedicated(core_count: usize) -> RuntimeManagerConfig { let tokio_cfg_1 = TokioConfig { core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, - max: cc / 2, + max: core_count / 2, }, - worker_threads: cc / 2, + worker_threads: core_count / 2, ..Default::default() }; let tokio_cfg_2 = TokioConfig { core_allocation: CoreAllocation::DedicatedCoreSet { - min: cc / 2, - max: cc, + min: core_count / 2, + max: core_count, }, - worker_threads: cc / 2, + worker_threads: core_count / 2, ..Default::default() }; RuntimeManagerConfig { tokio_configs: HashMap::from([ - ("tokio1".into(), tokio_cfg_1), - ("tokio2".into(), tokio_cfg_2), - ]), - tokio_runtime_mapping: HashMap::from([ - ("axum1".into(), "tokio1".into()), - ("axum2".into(), "tokio2".into()), + ("axum1".into(), tokio_cfg_1), + ("axum2".into(), tokio_cfg_2), ]), ..Default::default() } @@ -93,7 +88,7 @@ enum Regime { Single, } impl Regime { - const VALUES: [Self; 3] = [Self::Shared, Self::Dedicated, Self::Single]; + const VALUES: [Self; 3] = [Self::Dedicated, Self::Shared, Self::Single]; } #[derive(Debug, Default, serde::Serialize)] @@ -103,72 +98,84 @@ struct Results { } fn main() -> anyhow::Result<()> { + env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init(); let mut all_results: HashMap<String, Results> = HashMap::new(); for regime in Regime::VALUES { - let mut res = Results::default(); - for core_cnt in [2, 4, 8, 16] { - let rtm; - println!("==================="); - println!("Running {core_cnt} cores under {regime:?}"); - let (tok1, tok2) = match regime { + let mut results = Results::default(); + for core_count in [2, 4, 8, 16] { + let manager; + info!("==================="); + info!("Running {core_count} cores under {regime:?}"); + let (tokio1, tokio2) = match regime { Regime::Shared => { - rtm = ThreadManager::new(make_config_shared(core_cnt)).unwrap(); + manager = ThreadManager::new(make_config_shared(core_count)).unwrap(); ( - rtm.get_tokio("axum1") + manager + .get_tokio("axum1") .expect("Expecting runtime named axum1"), - rtm.get_tokio("axum2") + manager + .get_tokio("axum2") .expect("Expecting runtime named axum2"), ) } Regime::Dedicated => { - rtm = ThreadManager::new(make_config_dedicated(core_cnt)).unwrap(); + manager = ThreadManager::new(make_config_dedicated(core_count)).unwrap(); ( - rtm.get_tokio("axum1") + manager + .get_tokio("axum1") .expect("Expecting runtime named axum1"), - rtm.get_tokio("axum2") + manager + .get_tokio("axum2") .expect("Expecting runtime named axum2"), ) } Regime::Single => { - rtm = ThreadManager::new(make_config_shared(core_cnt)).unwrap(); + manager = ThreadManager::new(make_config_shared(core_count)).unwrap(); ( - rtm.get_tokio("axum1") + manager + .get_tokio("axum1") .expect("Expecting runtime named axum1"), - rtm.get_tokio("axum2") + manager + .get_tokio("axum2") .expect("Expecting runtime named axum2"), ) } }; let wrk_cores: Vec<_> = (32..64).collect(); - let results = std::thread::scope(|s| { + let measurement = std::thread::scope(|s| { s.spawn(|| { - tok1.tokio.spawn(axum_main(8888)); + tokio1.start_metrics_sampling(Duration::from_secs(1)); + tokio1.tokio.block_on(axum_main(8888)); }); let jh = match regime { Regime::Single => s.spawn(|| { - run_wrk(&[8888, 8888], &wrk_cores, wrk_cores.len(), 1000).unwrap() + run_wrk(&[8888, 8888], &wrk_cores, wrk_cores.len(), 3000).unwrap() }), _ => { s.spawn(|| { - tok2.tokio.spawn(axum_main(8889)); + tokio2.start_metrics_sampling(Duration::from_secs(1)); + tokio2.tokio.block_on(axum_main(8889)); }); s.spawn(|| { - run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 1000).unwrap() + run_wrk(&[8888, 8889], &wrk_cores, wrk_cores.len(), 3000).unwrap() }) } }; jh.join().expect("WRK crashed!") }); - println!("Results are: {:?}", results); - res.latencies_s.push( - results.0.iter().map(|a| a.as_secs_f32()).sum::<f32>() / results.0.len() as f32, + info!("Results are: {:?}", measurement); + results.latencies_s.push( + measurement.0.iter().map(|a| a.as_secs_f32()).sum::<f32>() + / measurement.0.len() as f32, ); - res.rps.push(results.1.iter().sum()); + results.rps.push(measurement.1.iter().sum()); } - all_results.insert(format!("{regime:?}"), res); + all_results.insert(format!("{regime:?}"), results); std::thread::sleep(Duration::from_secs(3)); } + + //print the resulting measurements so they can be e.g. plotted with matplotlib println!("{}", serde_json::to_string_pretty(&all_results)?); Ok(()) @@ -180,6 +187,9 @@ fn run_wrk( threads: usize, connections: usize, ) -> anyhow::Result<(Vec<Duration>, Vec<f32>)> { + //Sleep a bit to let axum start + std::thread::sleep(Duration::from_millis(500)); + let mut script = PathBuf::from(env!("CARGO_MANIFEST_DIR")); script.push("examples/report.lua"); let cpus: Vec<String> = cpus.iter().map(|c| c.to_string()).collect(); @@ -209,7 +219,7 @@ fn run_wrk( let mut all_latencies = vec![]; let mut all_rps = vec![]; for (out, port) in outs.zip(ports.iter()) { - println!("========================="); + debug!("========================="); std::io::stdout().write_all(&out.stderr)?; let res = str::from_utf8(&out.stdout)?; let mut res = res.lines().last().unwrap().split(' '); @@ -219,7 +229,7 @@ fn run_wrk( let requests: usize = res.next().unwrap().parse()?; let rps = requests as f32 / 10.0; - println!("WRK results for port {port}: {latency:?} {rps}"); + debug!("WRK results for port {port}: {latency:?} {rps}"); all_latencies.push(Duration::from_micros(latency_us)); all_rps.push(rps); } diff --git a/thread-manager/src/policy.rs b/thread-manager/src/policy.rs index 828745d80372cd..cd975884459c1f 100644 --- a/thread-manager/src/policy.rs +++ b/thread-manager/src/policy.rs @@ -1,8 +1,11 @@ use { serde::{Deserialize, Serialize}, + std::sync::OnceLock, thread_priority::ThreadExt, }; +static CORE_COUNT: OnceLock<usize> = OnceLock::new(); + #[derive(Default, Debug, Clone, Serialize, Deserialize)] pub enum CoreAllocation { ///Use OS default allocation (i.e. do not alter core affinity) @@ -17,17 +20,31 @@ pub enum CoreAllocation { impl CoreAllocation { /// Converts into a vector of core IDs. OsDefault is converted to empty vector. pub fn as_core_mask_vector(&self) -> Vec<usize> { + let core_count = CORE_COUNT.get_or_init(num_cpus::get); match *self { CoreAllocation::PinnedCores { min, max } => (min..max).collect(), CoreAllocation::DedicatedCoreSet { min, max } => (min..max).collect(), - CoreAllocation::OsDefault => vec![], + CoreAllocation::OsDefault => Vec::from_iter(0..*core_count), } } } #[cfg(target_os = "linux")] pub fn set_thread_affinity(cores: &[usize]) { - affinity::set_thread_affinity(cores).expect("Can not set thread affinity for runtime worker"); + assert!( + !cores.is_empty(), + "Can not call setaffinity with empty cores mask" + ); + if let Err(e) = affinity::set_thread_affinity(cores) { + let thread = std::thread::current(); + let msg = format!( + "Can not set core affinity {:?} for thread {:?} named {:?}, error {e}", + cores, + thread.id(), + thread.name() + ); + panic!("{}", msg); + } } #[cfg(not(target_os = "linux"))] diff --git a/thread-manager/src/tokio_runtime.rs b/thread-manager/src/tokio_runtime.rs index 3e0682b8c46bd3..b8563f9ae11348 100644 --- a/thread-manager/src/tokio_runtime.rs +++ b/thread-manager/src/tokio_runtime.rs @@ -2,9 +2,12 @@ use { crate::policy::{apply_policy, CoreAllocation}, serde::{Deserialize, Serialize}, solana_metrics::datapoint_info, - std::sync::{ - atomic::{AtomicI64, AtomicUsize, Ordering}, - Arc, Mutex, + std::{ + sync::{ + atomic::{AtomicI64, AtomicUsize, Ordering}, + Arc, Mutex, + }, + time::Duration, }, thread_priority::ThreadExt, }; @@ -35,35 +38,6 @@ impl Default for TokioConfig { } } -#[derive(Debug)] -pub struct ThreadCounters { - pub namespace: &'static str, - pub parked_threads_cnt: AtomicI64, - pub active_threads_cnt: AtomicI64, -} - -impl ThreadCounters { - pub fn on_park(&self) { - let parked = self.parked_threads_cnt.fetch_add(1, Ordering::Relaxed); - let active = self.active_threads_cnt.fetch_sub(1, Ordering::Relaxed); - datapoint_info!( - self.namespace, - ("threads_parked", parked, i64), - ("threads_active", active, i64), - ); - } - - pub fn on_unpark(&self) { - let parked = self.parked_threads_cnt.fetch_sub(1, Ordering::Relaxed); - let active = self.active_threads_cnt.fetch_add(1, Ordering::Relaxed); - datapoint_info!( - self.namespace, - ("threads_parked", parked, i64), - ("threads_active", active, i64), - ); - } -} - #[derive(Debug)] pub struct TokioRuntime { pub tokio: tokio::runtime::Runtime, @@ -72,6 +46,12 @@ pub struct TokioRuntime { } impl TokioRuntime { + /// Starts the metrics sampling task on the runtime to monitor how many workers are busy doing useful things. + pub fn start_metrics_sampling(&self, period: Duration) { + let counters = self.counters.clone(); + self.tokio.spawn(metrics_sampler(counters, period)); + } + pub fn new(name: String, cfg: TokioConfig) -> anyhow::Result<Self> { let num_workers = if cfg.worker_threads == 0 { num_cpus::get() @@ -94,7 +74,9 @@ impl TokioRuntime { let counters = Arc::new(ThreadCounters { namespace: format!("thread-manager-tokio-{}", &base_name).leak(), // no workaround, metrics crate will only consume 'static str parked_threads_cnt: AtomicI64::new(0), - active_threads_cnt: AtomicI64::new(0), + active_threads_cnt: AtomicI64::new( + (num_workers.wrapping_add(cfg.max_blocking_threads)) as i64, + ), }); let counters_clone1 = counters.clone(); let counters_clone2 = counters.clone(); @@ -134,3 +116,37 @@ impl TokioRuntime { }) } } + +///Internal counters to keep track of worker pool utilization +#[derive(Debug)] +pub struct ThreadCounters { + pub namespace: &'static str, + pub parked_threads_cnt: AtomicI64, + pub active_threads_cnt: AtomicI64, +} + +impl ThreadCounters { + pub fn on_park(&self) { + self.parked_threads_cnt.fetch_add(1, Ordering::Relaxed); + self.active_threads_cnt.fetch_sub(1, Ordering::Relaxed); + } + + pub fn on_unpark(&self) { + self.parked_threads_cnt.fetch_sub(1, Ordering::Relaxed); + self.active_threads_cnt.fetch_add(1, Ordering::Relaxed); + } +} + +async fn metrics_sampler(counters: Arc<ThreadCounters>, period: Duration) { + let mut interval = tokio::time::interval(period); + loop { + interval.tick().await; + let parked = counters.parked_threads_cnt.load(Ordering::Relaxed); + let active = counters.active_threads_cnt.load(Ordering::Relaxed); + datapoint_info!( + counters.namespace, + ("threads_parked", parked, i64), + ("threads_active", active, i64), + ); + } +} From c93751c5e5b8e9e6171802a68214cec587003c42 Mon Sep 17 00:00:00 2001 From: Alex Pyattaev <alex.pyattaev@anza.xyz> Date: Thu, 19 Dec 2024 14:00:02 +0000 Subject: [PATCH 9/9] improved UX with interior refcounting --- .../examples/core_contention_basics.rs | 2 +- .../examples/core_contention_sweep.rs | 8 +- thread-manager/src/lib.rs | 142 ++++++++++++------ thread-manager/src/native_thread_runtime.rs | 51 +++++-- thread-manager/src/rayon_runtime.rs | 37 ++++- thread-manager/src/tokio_runtime.rs | 20 ++- 6 files changed, 191 insertions(+), 69 deletions(-) diff --git a/thread-manager/examples/core_contention_basics.rs b/thread-manager/examples/core_contention_basics.rs index ea481a707893b8..d23b5e16d49644 100644 --- a/thread-manager/examples/core_contention_basics.rs +++ b/thread-manager/examples/core_contention_basics.rs @@ -54,7 +54,7 @@ fn main() -> anyhow::Result<()> { conf_file.push(exp); let mut buf = String::new(); std::fs::File::open(conf_file)?.read_to_string(&mut buf)?; - let cfg: RuntimeManagerConfig = toml::from_str(&buf)?; + let cfg: ThreadManagerConfig = toml::from_str(&buf)?; let manager = ThreadManager::new(cfg).unwrap(); let tokio1 = manager diff --git a/thread-manager/examples/core_contention_sweep.rs b/thread-manager/examples/core_contention_sweep.rs index e466b3bae05086..5edd213a42839e 100644 --- a/thread-manager/examples/core_contention_sweep.rs +++ b/thread-manager/examples/core_contention_sweep.rs @@ -40,14 +40,14 @@ async fn axum_main(port: u16) { } } } -fn make_config_shared(cc: usize) -> RuntimeManagerConfig { +fn make_config_shared(cc: usize) -> ThreadManagerConfig { let tokio_cfg_1 = TokioConfig { core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, max: cc }, worker_threads: cc, ..Default::default() }; let tokio_cfg_2 = tokio_cfg_1.clone(); - RuntimeManagerConfig { + ThreadManagerConfig { tokio_configs: HashMap::from([ ("axum1".into(), tokio_cfg_1), ("axum2".into(), tokio_cfg_2), @@ -55,7 +55,7 @@ fn make_config_shared(cc: usize) -> RuntimeManagerConfig { ..Default::default() } } -fn make_config_dedicated(core_count: usize) -> RuntimeManagerConfig { +fn make_config_dedicated(core_count: usize) -> ThreadManagerConfig { let tokio_cfg_1 = TokioConfig { core_allocation: CoreAllocation::DedicatedCoreSet { min: 0, @@ -72,7 +72,7 @@ fn make_config_dedicated(core_count: usize) -> RuntimeManagerConfig { worker_threads: core_count / 2, ..Default::default() }; - RuntimeManagerConfig { + ThreadManagerConfig { tokio_configs: HashMap::from([ ("axum1".into(), tokio_cfg_1), ("axum2".into(), tokio_cfg_2), diff --git a/thread-manager/src/lib.rs b/thread-manager/src/lib.rs index c439432cb20bd0..e852f00995aed5 100644 --- a/thread-manager/src/lib.rs +++ b/thread-manager/src/lib.rs @@ -1,7 +1,7 @@ use { anyhow::Ok, serde::{Deserialize, Serialize}, - std::collections::HashMap, + std::{collections::HashMap, ops::Deref, sync::Arc}, }; pub mod native_thread_runtime; @@ -18,7 +18,7 @@ pub use { pub type ConstString = Box<str>; #[derive(Default, Debug)] -pub struct ThreadManager { +pub struct ThreadManagerInner { pub tokio_runtimes: HashMap<ConstString, TokioRuntime>, pub tokio_runtime_mapping: HashMap<ConstString, ConstString>, @@ -28,44 +28,9 @@ pub struct ThreadManager { pub rayon_runtimes: HashMap<ConstString, RayonRuntime>, pub rayon_runtime_mapping: HashMap<ConstString, ConstString>, } - -#[derive(Default, Clone, Debug, Serialize, Deserialize)] -#[serde(default)] -pub struct RuntimeManagerConfig { - pub native_configs: HashMap<String, NativeConfig>, - pub native_runtime_mapping: HashMap<String, String>, - - pub rayon_configs: HashMap<String, RayonConfig>, - pub rayon_runtime_mapping: HashMap<String, String>, - - pub tokio_configs: HashMap<String, TokioConfig>, - pub tokio_runtime_mapping: HashMap<String, String>, - - pub default_core_allocation: CoreAllocation, -} - -impl ThreadManager { - pub fn get_native(&self, name: &str) -> Option<&NativeThreadRuntime> { - let name = self.native_runtime_mapping.get(name)?; - self.native_thread_runtimes.get(name) - } - pub fn get_rayon(&self, name: &str) -> Option<&RayonRuntime> { - let name = self.rayon_runtime_mapping.get(name)?; - self.rayon_runtimes.get(name) - } - pub fn get_tokio(&self, name: &str) -> Option<&TokioRuntime> { - let name = self.tokio_runtime_mapping.get(name)?; - self.tokio_runtimes.get(name) - } - pub fn set_process_affinity(config: &RuntimeManagerConfig) -> anyhow::Result<Vec<usize>> { - let chosen_cores_mask = config.default_core_allocation.as_core_mask_vector(); - - crate::policy::set_thread_affinity(&chosen_cores_mask); - Ok(chosen_cores_mask) - } - +impl ThreadManagerInner { /// Populates mappings with copies of config names, overrides as appropriate - fn populate_mappings(&mut self, config: &RuntimeManagerConfig) { + fn populate_mappings(&mut self, config: &ThreadManagerConfig) { //TODO: this should probably be cleaned up with a macro at some point... for name in config.native_configs.keys() { @@ -95,10 +60,93 @@ impl ThreadManager { .insert(k.clone().into_boxed_str(), v.clone().into_boxed_str()); } } - pub fn new(config: RuntimeManagerConfig) -> anyhow::Result<Self> { +} + +#[derive(Default, Debug, Clone)] +pub struct ThreadManager { + inner: Arc<ThreadManagerInner>, +} +impl Deref for ThreadManager { + type Target = ThreadManagerInner; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +#[serde(default)] +pub struct ThreadManagerConfig { + pub native_configs: HashMap<String, NativeConfig>, + pub native_runtime_mapping: HashMap<String, String>, + + pub rayon_configs: HashMap<String, RayonConfig>, + pub rayon_runtime_mapping: HashMap<String, String>, + + pub tokio_configs: HashMap<String, TokioConfig>, + pub tokio_runtime_mapping: HashMap<String, String>, + + pub default_core_allocation: CoreAllocation, +} + +impl Default for ThreadManagerConfig { + fn default() -> Self { + Self { + native_configs: HashMap::from([("default".to_owned(), NativeConfig::default())]), + native_runtime_mapping: HashMap::new(), + rayon_configs: HashMap::from([("default".to_owned(), RayonConfig::default())]), + rayon_runtime_mapping: HashMap::new(), + tokio_configs: HashMap::from([("default".to_owned(), TokioConfig::default())]), + tokio_runtime_mapping: HashMap::new(), + default_core_allocation: CoreAllocation::OsDefault, + } + } +} + +impl ThreadManager { + /// Will lookup a runtime by given name. If not found, will try to lookup by name "default". If all fails, returns None. + fn lookup<'a, T>( + &'a self, + name: &str, + mapping: &HashMap<ConstString, ConstString>, + runtimes: &'a HashMap<ConstString, T>, + ) -> Option<&'a T> { + match mapping.get(name) { + Some(n) => runtimes.get(n), + None => match mapping.get("default") { + Some(n) => runtimes.get(n), + None => None, + }, + } + } + + pub fn get_native(&self, name: &str) -> Option<&NativeThreadRuntime> { + self.lookup( + name, + &self.native_runtime_mapping, + &self.native_thread_runtimes, + ) + } + + pub fn get_rayon(&self, name: &str) -> Option<&RayonRuntime> { + self.lookup(name, &self.rayon_runtime_mapping, &self.rayon_runtimes) + } + + pub fn get_tokio(&self, name: &str) -> Option<&TokioRuntime> { + self.lookup(name, &self.tokio_runtime_mapping, &self.tokio_runtimes) + } + + pub fn set_process_affinity(config: &ThreadManagerConfig) -> anyhow::Result<Vec<usize>> { + let chosen_cores_mask = config.default_core_allocation.as_core_mask_vector(); + + crate::policy::set_thread_affinity(&chosen_cores_mask); + Ok(chosen_cores_mask) + } + + pub fn new(config: ThreadManagerConfig) -> anyhow::Result<Self> { let mut core_allocations = HashMap::<ConstString, Vec<usize>>::new(); Self::set_process_affinity(&config)?; - let mut manager = Self::default(); + let mut manager = ThreadManagerInner::default(); manager.populate_mappings(&config); for (name, cfg) in config.native_configs.iter() { let nrt = NativeThreadRuntime::new(name.clone(), cfg.clone()); @@ -124,14 +172,16 @@ impl ThreadManager { .tokio_runtimes .insert(name.clone().into_boxed_str(), tokiort); } - Ok(manager) + Ok(Self { + inner: Arc::new(manager), + }) } } #[cfg(test)] mod tests { use { - crate::{CoreAllocation, NativeConfig, RayonConfig, RuntimeManagerConfig, ThreadManager}, + crate::{CoreAllocation, NativeConfig, RayonConfig, ThreadManager, ThreadManagerConfig}, std::{collections::HashMap, io::Read}, }; @@ -151,7 +201,7 @@ mod tests { .unwrap() .read_to_string(&mut buf) .unwrap(); - let cfg: RuntimeManagerConfig = toml::from_str(&buf).unwrap(); + let cfg: ThreadManagerConfig = toml::from_str(&buf).unwrap(); println!("{:?}", cfg); } } @@ -166,7 +216,7 @@ mod tests { #[test] fn process_affinity() { - let conf = RuntimeManagerConfig { + let conf = ThreadManagerConfig { native_configs: HashMap::from([( "pool1".to_owned(), NativeConfig { @@ -207,7 +257,7 @@ mod tests { #[test] fn rayon_affinity() { - let conf = RuntimeManagerConfig { + let conf = ThreadManagerConfig { rayon_configs: HashMap::from([( "test".to_owned(), RayonConfig { diff --git a/thread-manager/src/native_thread_runtime.rs b/thread-manager/src/native_thread_runtime.rs index f99db65ae9fe5c..cb01eeff8ae3bd 100644 --- a/thread-manager/src/native_thread_runtime.rs +++ b/thread-manager/src/native_thread_runtime.rs @@ -4,9 +4,12 @@ use { log::error, serde::{Deserialize, Serialize}, solana_metrics::datapoint_info, - std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, Mutex, + std::{ + ops::Deref, + sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, Mutex, + }, }, }; @@ -23,7 +26,7 @@ impl Default for NativeConfig { fn default() -> Self { Self { core_allocation: CoreAllocation::OsDefault, - max_threads: 10, + max_threads: 16, priority: 0, stack_size_bytes: 2 * 1024 * 1024, } @@ -31,13 +34,26 @@ impl Default for NativeConfig { } #[derive(Debug)] -pub struct NativeThreadRuntime { +pub struct NativeThreadRuntimeInner { pub id_count: AtomicUsize, pub running_count: Arc<AtomicUsize>, pub config: NativeConfig, pub name: String, } +#[derive(Debug, Clone)] +pub struct NativeThreadRuntime { + inner: Arc<NativeThreadRuntimeInner>, +} + +impl Deref for NativeThreadRuntime { + type Target = NativeThreadRuntimeInner; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + pub struct JoinHandle<T> { std_handle: Option<std::thread::JoinHandle<T>>, running_count: Arc<AtomicUsize>, @@ -82,13 +98,27 @@ impl<T> Drop for JoinHandle<T> { impl NativeThreadRuntime { pub fn new(name: String, cfg: NativeConfig) -> Self { Self { - id_count: AtomicUsize::new(0), - running_count: Arc::new(AtomicUsize::new(0)), - config: cfg, - name, + inner: Arc::new(NativeThreadRuntimeInner { + id_count: AtomicUsize::new(0), + running_count: Arc::new(AtomicUsize::new(0)), + config: cfg, + name, + }), } } + pub fn spawn<F, T>(&self, f: F) -> anyhow::Result<JoinHandle<T>> + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + let n = self.id_count.fetch_add(1, Ordering::Relaxed); + let name = format!("{}-{}", &self.name, n); + self.spawn_named(name, f) + } + + pub fn spawn_named<F, T>(&self, name: String, f: F) -> anyhow::Result<JoinHandle<T>> where F: FnOnce() -> T, F: Send + 'static, @@ -102,9 +132,8 @@ impl NativeThreadRuntime { let core_alloc = self.config.core_allocation.clone(); let priority = self.config.priority; let chosen_cores_mask = Mutex::new(self.config.core_allocation.as_core_mask_vector()); - let n = self.id_count.fetch_add(1, Ordering::Relaxed); let jh = std::thread::Builder::new() - .name(format!("{}-{}", &self.name, n)) + .name(name) .stack_size(self.config.stack_size_bytes) .spawn(move || { apply_policy(&core_alloc, priority, &chosen_cores_mask); diff --git a/thread-manager/src/rayon_runtime.rs b/thread-manager/src/rayon_runtime.rs index b731bd83051bcb..a6d3a29962b2b7 100644 --- a/thread-manager/src/rayon_runtime.rs +++ b/thread-manager/src/rayon_runtime.rs @@ -3,9 +3,12 @@ use { anyhow::Ok, serde::{Deserialize, Serialize}, solana_metrics::datapoint_info, - std::sync::{ - atomic::{AtomicI64, Ordering}, - Mutex, + std::{ + ops::Deref, + sync::{ + atomic::{AtomicI64, Ordering}, + Arc, Mutex, + }, }, }; @@ -22,7 +25,7 @@ impl Default for RayonConfig { fn default() -> Self { Self { core_allocation: CoreAllocation::OsDefault, - worker_threads: 4, + worker_threads: 16, priority: 0, stack_size_bytes: 2 * 1024 * 1024, } @@ -30,10 +33,30 @@ impl Default for RayonConfig { } #[derive(Debug)] -pub struct RayonRuntime { +pub struct RayonRuntimeInner { pub rayon_pool: rayon::ThreadPool, pub config: RayonConfig, } +impl Deref for RayonRuntimeInner { + type Target = rayon::ThreadPool; + + fn deref(&self) -> &Self::Target { + &self.rayon_pool + } +} + +#[derive(Debug, Clone)] +pub struct RayonRuntime { + inner: Arc<RayonRuntimeInner>, +} + +impl Deref for RayonRuntime { + type Target = RayonRuntimeInner; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} impl RayonRuntime { pub fn new(name: String, config: RayonConfig) -> anyhow::Result<Self> { @@ -50,6 +73,8 @@ impl RayonRuntime { apply_policy(&policy, priority, &chosen_cores_mask); }) .build()?; - Ok(Self { rayon_pool, config }) + Ok(Self { + inner: Arc::new(RayonRuntimeInner { rayon_pool, config }), + }) } } diff --git a/thread-manager/src/tokio_runtime.rs b/thread-manager/src/tokio_runtime.rs index b8563f9ae11348..363d4140c43f27 100644 --- a/thread-manager/src/tokio_runtime.rs +++ b/thread-manager/src/tokio_runtime.rs @@ -3,6 +3,7 @@ use { serde::{Deserialize, Serialize}, solana_metrics::datapoint_info, std::{ + ops::Deref, sync::{ atomic::{AtomicI64, AtomicUsize, Ordering}, Arc, Mutex, @@ -29,7 +30,7 @@ impl Default for TokioConfig { fn default() -> Self { Self { core_allocation: CoreAllocation::OsDefault, - worker_threads: 1, + worker_threads: 8, max_blocking_threads: 1, priority: 0, stack_size_bytes: 2 * 1024 * 1024, @@ -45,6 +46,14 @@ pub struct TokioRuntime { pub counters: Arc<ThreadCounters>, } +impl Deref for TokioRuntime { + type Target = tokio::runtime::Runtime; + + fn deref(&self) -> &Self::Target { + &self.tokio + } +} + impl TokioRuntime { /// Starts the metrics sampling task on the runtime to monitor how many workers are busy doing useful things. pub fn start_metrics_sampling(&self, period: Duration) { @@ -115,6 +124,15 @@ impl TokioRuntime { counters, }) } + + /// Makes test runtime with 2 threads, only for unittests + pub fn new_for_tests() -> Self { + let cfg = TokioConfig { + worker_threads: 2, + ..Default::default() + }; + TokioRuntime::new("solNetTest".to_owned(), cfg.clone()).unwrap() + } } ///Internal counters to keep track of worker pool utilization