Skip to content

Commit

Permalink
Fix dtrace system level scripts (#1560)
Browse files Browse the repository at this point in the history
While debugging system level issues on London, we found the dtrace
scripts did not handle multiple session IDs correctly and were not
showing correct results.
 
To fix, I did a refactor the scripts get-ds-state and get-lr-state to
correctly handle a PID with multiple sessions and improved the printing
time for getting a system summary output.

There is now a single dtrace sub-script that gathers all output from all
process, then we sort and display a line for
each session in post-processing.  This speeds up run time overall.

Sample output:
```
root@oxz_switch0:~# pilot host exec -c 'hostname && /opt/oxide/crucible_dtrace/get-lr-state.sh' 8-9 
 8  BRM44220011        ok: BRM44220011
oxz_propolis-server_7f810e9c 19793 e1a38744 0 0 0 0 0 0
oxz_propolis-server_68932d85 19836 29d41aea 0 0 0 0 0 0
oxz_propolis-server_68932d85 19836 ccb3ce4c 0 0 0 0 0 0
oxz_propolis-server_501befcb 20559 7ef8a2b6 0 0 0 0 0 0
 9  BRM44220005        ok: BRM44220005
oxz_propolis-server_3354b7e3 20062 8f594433 0 0 0 0 0 0
oxz_propolis-server_54f5c356 21042 ce250057 0 0 0 0 0 0
oxz_propolis-server_0e091218 22706 405c57b7 0 0 0 0 0 0
oxz_propolis-server_057ef9d1 22713 f60d7a47 0 0 0 0 0 0
oxz_propolis-server_52e30c4d 22777 a9649821 0 0 0 0 0 0
oxz_propolis-server_52e30c4d 22777 fe4ae0b9 0 0 0 0 0 0

root@oxz_switch0:~# pilot host exec -c 'hostname && /opt/oxide/crucible_dtrace/get-ds-state.sh' 8-9                                                                   
 8  BRM44220011        ok: BRM44220011
oxz_propolis-server_7f810e9c 19793 e1a38744            active            active            active
oxz_propolis-server_68932d85 19836 29d41aea            active            active            active
oxz_propolis-server_68932d85 19836 ccb3ce4c            active            active            active
oxz_propolis-server_501befcb 20559 7ef8a2b6            active            active            active
 9  BRM44220005        ok: BRM44220005
oxz_propolis-server_3354b7e3 20062 8f594433            active            active            active
oxz_propolis-server_54f5c356 21042 ce250057            active            active            active
oxz_propolis-server_0e091218 22706 405c57b7            active            active            active
oxz_propolis-server_057ef9d1 22713 f60d7a47            active            active            active
oxz_propolis-server_52e30c4d 22777 a9649821            active            active            active
oxz_propolis-server_52e30c4d 22777 fe4ae0b9            active            active            active
root@oxz_switch0:~# 
```

---------

Co-authored-by: Alan Hanson <[email protected]>
  • Loading branch information
leftwo and Alan Hanson authored Nov 15, 2024
1 parent 515725c commit 4dd82c6
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 15 deletions.
23 changes: 23 additions & 0 deletions tools/dtrace/get-ds-state.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Print a status line for all matching probes.
* Exit after 5 seconds.
*/
#pragma D option quiet
#pragma D option strsize=1k

crucible_upstairs*:::up-status
{
my_sesh = json(copyinstr(arg1), "ok.session_id");

printf("%6d %8s %17s %17s %17s\n",
pid,
substr(my_sesh, 0, 8),
json(copyinstr(arg1), "ok.ds_state[0]"),
json(copyinstr(arg1), "ok.ds_state[1]"),
json(copyinstr(arg1), "ok.ds_state[2]"));
}

tick-5s
{
exit(0);
}
21 changes: 14 additions & 7 deletions tools/dtrace/get-ds-state.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
#!/bin/bash
#
# This script will display the downstairs states for any propolis zones it
# finds running on a system.
for zzz in $(zoneadm list | grep propolis); do
echo -n "$zzz "
ppid=$(zlogin "$zzz" pgrep propolis-server)
dtrace -xstrsize=1k -p $ppid -q -n 'crucible_upstairs*:::up-status { printf("%6d %17s %17s %17s", pid, json(copyinstr(arg1), "ok.ds_state[0]"), json(copyinstr(arg1), "ok.ds_state[1]"), json(copyinstr(arg1), "ok.ds_state[2]")); exit(0); }'
done
# This script will display the downstairs states for each pid/session
# it finds running on a system.
filename='/tmp/get-ds-state.out'

# Gather state on all running propolis servers, record summary to a file
dtrace -s /opt/oxide/crucible_dtrace/get-ds-state.d | sort -n | uniq | awk 'NF' > "$filename"
# Walk the lines in the file, append the zone name to each line.
while read -r p; do
# For each line in the file, pull out the PID we are looking at and
# print the zone that process is running in.
pid=$(echo $p | awk '{print $1}')
zone=$(ps -o zone -p $pid | tail -1 | cut -c 1-28)
echo "$zone $p"
done < "$filename"
26 changes: 26 additions & 0 deletions tools/dtrace/get-lr-state.d
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Print a live repair status line for all matching probes.
* Exit after 5 seconds.
*/
#pragma D option quiet
#pragma D option strsize=1k

crucible_upstairs*:::up-status
{
my_sesh = json(copyinstr(arg1), "ok.session_id");

printf("%6d %8s %s %s %s %s %s %s\n",
pid,
substr(my_sesh, 0, 8),
json(copyinstr(arg1), "ok.ds_live_repair_completed[0]"),
json(copyinstr(arg1), "ok.ds_live_repair_completed[1]"),
json(copyinstr(arg1), "ok.ds_live_repair_completed[2]"),
json(copyinstr(arg1), "ok.ds_live_repair_aborted[0]"),
json(copyinstr(arg1), "ok.ds_live_repair_aborted[1]"),
json(copyinstr(arg1), "ok.ds_live_repair_aborted[2]"));
}

tick-5s
{
exit(0);
}
23 changes: 15 additions & 8 deletions tools/dtrace/get-lr-state.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
#!/bin/bash
#
# This script will log into every propolis zone it finds and get the
# DTrace live repair counters from propolis-server in each zone.
for zzz in $(zoneadm list | grep propolis); do
echo -n "$zzz "
ppid=$(zlogin "$zzz" pgrep propolis-server)
dtrace -xstrsize=1k -p $ppid -q -n 'crucible_upstairs*:::up-status { printf("%6d %s %s %s %s %s %s", pid, json(copyinstr(arg1), "ok.ds_live_repair_completed[0]"), json(copyinstr(arg1), "ok.ds_live_repair_completed[1]"), json(copyinstr(arg1), "ok.ds_live_repair_completed[2]"), json(copyinstr(arg1), "ok.ds_live_repair_aborted[0]"), json(copyinstr(arg1), "ok.ds_live_repair_aborted[1]"), json(copyinstr(arg1), "ok.ds_live_repair_aborted[2]")); exit(0); }'
done

# This script will display the downstairs live repair for each
# pid/session it finds running on a system.
filename='/tmp/get-lr-state.out'

# Gather state on all running propolis servers, record summary to a file
dtrace -s /opt/oxide/crucible_dtrace/get-lr-state.d | sort -n | uniq | awk 'NF' > "$filename"
# Walk the lines in the file, append the zone name to each line.
while read -r p; do
# For each line in the file, pull out the PID we are looking at and
# print the zone that process is running in.
pid=$(echo $p | awk '{print $1}')
zone=$(ps -o zone -p $pid | tail -1 | cut -c 1-28)
echo "$zone $p"
done < "$filename"
2 changes: 2 additions & 0 deletions tools/make-dtrace.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@ tar cvf ../../out/crucible-dtrace.tar \
README.md \
all_downstairs.d \
downstairs_count.d \
get-ds-state.d \
get-ds-state.sh \
get-lr-state.d \
get-lr-state.sh \
perf-downstairs-os.d \
perf-downstairs-three.d \
Expand Down

0 comments on commit 4dd82c6

Please sign in to comment.