From 7b99029b7b5c67d276249c32b381f9adcb131109 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Thu, 12 Dec 2024 14:47:27 -0800 Subject: [PATCH 1/6] Add DTrace scripts to Nexus zone - Adds a script for tracing all transactions run by Nexus, including their overall latency and the number of statements in each one. - Move all existing scripts to a Nexus subdirectory, and then include that whole directory in the Omicron zone for Nexus itself. - Closes #7224 --- package-manifest.toml | 1 + .../{ => nexus}/aggregate-query-latency.d | 0 tools/dtrace/{ => nexus}/slowest-queries.d | 2 +- tools/dtrace/{ => nexus}/trace-db-queries.d | 0 tools/dtrace/nexus/trace-transactions.d | 59 +++++++++++++++++++ 5 files changed, 61 insertions(+), 1 deletion(-) rename tools/dtrace/{ => nexus}/aggregate-query-latency.d (100%) rename tools/dtrace/{ => nexus}/slowest-queries.d (99%) rename tools/dtrace/{ => nexus}/trace-db-queries.d (100%) create mode 100755 tools/dtrace/nexus/trace-transactions.d diff --git a/package-manifest.toml b/package-manifest.toml index 809c1ce6ca..2e7f489ae8 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -125,6 +125,7 @@ source.paths = [ { from = "smf/nexus/{{rack-topology}}", to = "/var/svc/manifest/site/nexus" }, { from = "out/console-assets", to = "/var/nexus/static" }, { from = "schema/crdb", to = "/var/nexus/schema/crdb" }, + { from = "tools/dtrace/nexus", to = "/var/nexus/dtrace" }, ] output.type = "zone" setup_hint = """ diff --git a/tools/dtrace/aggregate-query-latency.d b/tools/dtrace/nexus/aggregate-query-latency.d similarity index 100% rename from tools/dtrace/aggregate-query-latency.d rename to tools/dtrace/nexus/aggregate-query-latency.d diff --git a/tools/dtrace/slowest-queries.d b/tools/dtrace/nexus/slowest-queries.d similarity index 99% rename from tools/dtrace/slowest-queries.d rename to tools/dtrace/nexus/slowest-queries.d index 76e22de22f..08b40d4d79 100755 --- a/tools/dtrace/slowest-queries.d +++ b/tools/dtrace/nexus/slowest-queries.d @@ -35,7 +35,7 @@ diesel_db$target:::query-done query[this->conn_id] = NULL; } -tick-5s +tick-10s { printf("\n%Y\n", walltimestamp); trunc(@, 5); diff --git a/tools/dtrace/trace-db-queries.d b/tools/dtrace/nexus/trace-db-queries.d similarity index 100% rename from tools/dtrace/trace-db-queries.d rename to tools/dtrace/nexus/trace-db-queries.d diff --git a/tools/dtrace/nexus/trace-transactions.d b/tools/dtrace/nexus/trace-transactions.d new file mode 100755 index 0000000000..beffb3b322 --- /dev/null +++ b/tools/dtrace/nexus/trace-transactions.d @@ -0,0 +1,59 @@ +#!/usr/sbin/dtrace -qs + +/* Trace all transactions to the control plane database with their latency */ + +dtrace:::BEGIN +{ + printf("Tracing all database transactions for nexus PID %d, use Ctrl-C to exit\n", $target); +} + +/* + * Record the start and number of statements for each transaction. + * + * We're only recording the initial transactions, which are not nested. + */ +diesel_db$target:::transaction-start +/arg1 == 0/ +{ + this->key = copyinstr(arg0); + ts[this->key] = timestamp; + n_statements[this->key] = 0; +} + +/* + * When a query runs in the context of a transaction (on the same connection), + * bump the statement counter. + */ +diesel_db$target:::query-start +/ts[copyinstr(arg1)]/ +{ + n_statements[copyinstr(arg1)] += 1 +} + +/* + * As transactions complete, print the number of statements we ran and the + * duration. + * + * NOTE: The depth, arg1, is intentionally 1 here. When we first enter a + * non-nested transaction, the depth is 0, because there is no outer + * transaction. However, when the `transaction-done` probe fires, we are still + * in a transaction, in which case the depth is currently 1. It's decremented + * _after_ the transaction itself is committed or rolled back. + */ +diesel_db$targe:::transaction-done +/arg1 == 1 && ts[copyinstr(arg0)]/ +{ + this->key = copyinstr(arg0); + this->conn_id = json(this->key, "ok"); + this->latency = (timestamp - ts[this->key]) / 1000; + this->n_statements = n_statements[this->key]; + printf( + "%s %d statements on connection %s (%d us)\n", + arg2 ? "COMMIT" : "ROLLBACK", + n_statements[this->key], + this->conn_id, + this->latency + ); + ts[this->key] = 0; + n_statements[this->key] = 0; +} From c371852c5bb592db05db817d51e8bca9e57f6006 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 13 Dec 2024 19:04:26 +0000 Subject: [PATCH 2/6] Update diesel-dtrace, fix transaction depth predicate in probe --- Cargo.lock | 2 +- tools/dtrace/nexus/trace-transactions.d | 10 ++-------- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04cea43c75..01767c9d4f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11007,7 +11007,7 @@ version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "proc-macro2", "quote", "syn 2.0.87", diff --git a/tools/dtrace/nexus/trace-transactions.d b/tools/dtrace/nexus/trace-transactions.d index beffb3b322..004efed814 100755 --- a/tools/dtrace/nexus/trace-transactions.d +++ b/tools/dtrace/nexus/trace-transactions.d @@ -33,15 +33,9 @@ diesel_db$target:::query-start /* * As transactions complete, print the number of statements we ran and the * duration. - * - * NOTE: The depth, arg1, is intentionally 1 here. When we first enter a - * non-nested transaction, the depth is 0, because there is no outer - * transaction. However, when the `transaction-done` probe fires, we are still - * in a transaction, in which case the depth is currently 1. It's decremented - * _after_ the transaction itself is committed or rolled back. */ -diesel_db$targe:::transaction-done -/arg1 == 1 && ts[copyinstr(arg0)]/ +diesel_db$target:::transaction-done +/arg1 == 0 && ts[copyinstr(arg0)]/ { this->key = copyinstr(arg0); this->conn_id = json(this->key, "ok"); From b94e9bf9bc9d0fbfbc6450a5b52882b2781689e8 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 13 Dec 2024 19:09:19 +0000 Subject: [PATCH 3/6] hakariiiiii --- Cargo.lock | 1 + workspace-hack/Cargo.toml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 01767c9d4f..784c010274 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7396,6 +7396,7 @@ dependencies = [ "getrandom", "group", "hashbrown 0.15.1", + "heck 0.4.1", "hex", "hickory-proto", "hmac", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 31677ed8c1..678170b25e 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -63,6 +63,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.15.1" } +heck = { version = "0.4.1" } hex = { version = "0.4.3", features = ["serde"] } hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } @@ -182,6 +183,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } hashbrown = { version = "0.15.1" } +heck = { version = "0.4.1" } hex = { version = "0.4.3", features = ["serde"] } hickory-proto = { version = "0.24.1", features = ["text-parsing"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } From 9853e1b4d54c9bebd96c028c7429f35447bd3a45 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 13 Dec 2024 22:13:35 +0000 Subject: [PATCH 4/6] Use Nexus-specific transaction start / done probes --- tools/dtrace/nexus/trace-transactions.d | 26 +++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tools/dtrace/nexus/trace-transactions.d b/tools/dtrace/nexus/trace-transactions.d index 004efed814..baf7818f72 100755 --- a/tools/dtrace/nexus/trace-transactions.d +++ b/tools/dtrace/nexus/trace-transactions.d @@ -10,14 +10,26 @@ dtrace:::BEGIN /* * Record the start and number of statements for each transaction. * - * We're only recording the initial transactions, which are not nested. + * Note that we're using the Nexus-provided transaction start / done probes. + * This lets us associate the other data we might collect (number of statements, + * ustacks, etc) with the Nexus code itself. Although there are transaction + * start / done probes in `diesel-dtrace`, the existing way we run transactions + * with `async-bb8-diesel` involves spawning a future to run the transactions on + * a blocking thread-pool. That spawning makes it impossible to associate the + * context in which the `diesel-dtrace` probes fire with the Nexus code that + * actually spawned the transaction itself. */ -diesel_db$target:::transaction-start -/arg1 == 0/ +nexus_db_queries$target:::transaction-start { this->key = copyinstr(arg0); + transaction_names[this->key] = copyinstr(arg1); ts[this->key] = timestamp; n_statements[this->key] = 0; + printf( + "Started transaction '%s' on conn %s\n", + transaction_names[this->key], + json(this->key, "ok") + ); } /* @@ -34,20 +46,22 @@ diesel_db$target:::query-start * As transactions complete, print the number of statements we ran and the * duration. */ -diesel_db$target:::transaction-done -/arg1 == 0 && ts[copyinstr(arg0)]/ +nexus_db_queries$target:::transaction-done +/ts[copyinstr(arg0)]/ { this->key = copyinstr(arg0); this->conn_id = json(this->key, "ok"); this->latency = (timestamp - ts[this->key]) / 1000; this->n_statements = n_statements[this->key]; printf( - "%s %d statements on connection %s (%d us)\n", + "%s %d statement(s) in transaction '%s' on connection %s (%d us)\n", arg2 ? "COMMIT" : "ROLLBACK", n_statements[this->key], + transaction_names[this->key], this->conn_id, this->latency ); ts[this->key] = 0; n_statements[this->key] = 0; + transaction_names[this->key] = 0; } From 5e90bacead3a505c81098c0d0e39254506261af3 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 13 Dec 2024 23:03:24 +0000 Subject: [PATCH 5/6] Move D scripts to /opt --- package-manifest.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package-manifest.toml b/package-manifest.toml index 2e7f489ae8..376c87d9c1 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -125,7 +125,7 @@ source.paths = [ { from = "smf/nexus/{{rack-topology}}", to = "/var/svc/manifest/site/nexus" }, { from = "out/console-assets", to = "/var/nexus/static" }, { from = "schema/crdb", to = "/var/nexus/schema/crdb" }, - { from = "tools/dtrace/nexus", to = "/var/nexus/dtrace" }, + { from = "tools/dtrace/nexus", to = "/opt/oxide/omicron-nexus/dtrace" }, ] output.type = "zone" setup_hint = """ From 2d0edd7fe91c7e13881879dccf13399351d475d3 Mon Sep 17 00:00:00 2001 From: Benjamin Naecker Date: Fri, 13 Dec 2024 23:50:05 +0000 Subject: [PATCH 6/6] Alan has spoken --- package-manifest.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package-manifest.toml b/package-manifest.toml index 376c87d9c1..83b1ba8168 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -125,7 +125,7 @@ source.paths = [ { from = "smf/nexus/{{rack-topology}}", to = "/var/svc/manifest/site/nexus" }, { from = "out/console-assets", to = "/var/nexus/static" }, { from = "schema/crdb", to = "/var/nexus/schema/crdb" }, - { from = "tools/dtrace/nexus", to = "/opt/oxide/omicron-nexus/dtrace" }, + { from = "tools/dtrace/nexus", to = "/opt/oxide/dtrace/nexus" }, ] output.type = "zone" setup_hint = """