forked from brendangregg/wss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
wss.pl
executable file
·278 lines (264 loc) · 9.11 KB
/
wss.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
#!/usr/bin/perl -w
#
# wss.pl Estimate the working set size (WSS) for a process on Linux.
#
# This uses /proc/PID/clear_refs and works on older Linux's (2.6.22+),
# however, comes with warnings below. See its companion tools, wss-v1.c and
# wss-v2.c, which uses # the newer idle page tracking from Linux 4.3+, however,
# they are much slower to run (as described in the tools). The WSS measured
# here is page-based.
#
# http://www.brendangregg.com/wss.pl
#
# USAGE: wss [options] PID duration(s)
# eg,
# wss 181 0.01 # measure PID 181 WSS for 10 milliseconds
# wss -h # full USAGE
#
# COLUMNS:
# - Est(s): Estimated WSS measurement duration: this accounts for delays
# with setting and reading pagemap data, which inflates the
# intended sleep duration.
# - RSS(MB): Resident Set Size (Mbytes). The main memory size.
# - PSS(MB): Proportional Set Size (Mbytes). Accounting for shared pages.
# - Ref(MB): Referenced (Mbytes) during the specified duration.
# This is the working set size metric.
# - Dur(s): Full duration of measurement (seconds), from beginning to
# set page flags to completing reading them.
# - Slp(s): Total sleep time.
#
# I could add more columns, but that's what pmap -X is for.
#
# WARNING: This tool uses /proc/PID/clear_refs and /proc/PID/smaps, which can
# cause slightly higher application latency while the kernel walks process page
# structures. For large processes (> 100 Gbytes) this overhead duration of
# higher latency can last over 1 second (the system time of this tool). This
# also resets the referenced flag, which might confuse the kernel as to which
# pages to reclaim, especially if swapping is active. This also activates some
# old kernel code that may not have been used in your environment before, and
# which modifies page flags: I'd guess there is a risk of an undiscovered
# kernel panic (the Linux mm community may be able to say how real this risk
# is). Test in a lab environment for your kernel versions, and consider this
# experimental: use at your on risk.
#
# About the duration columns: here's how you think this tool should work:
#
# 1. reset referenced page flags for a process (instantaneous)
# 2. sleep for the duration
# 3. read referenced page flags (instantaneous)
#
# Here's what actually happens:
#
# 1. begin resetting page flags for a process
# 2. [... CPU time passes ...]
# 3. page flag reset completes
# 4. sleep for a duration
# 5. begin reading page flags
# 6. [... CPU time passes ...]
# 7. read complete
#
# So we get our WSS page count, but does it reflect just the sleep duration?
# No, the working set is being touched during stages 2 and 6, which inflate
# the sleep duration. Those stages for large processes (>100 Gbytes) can take
# over 500 ms of CPU time, and so a 10 ms target duration can really be
# reflecting 100s of ms of memory changes.
#
# To inform the end user of this duration inflation, this tool provides an
# estimated duration, measuring from the midpoint of stage 2 to the midpoint of
# stage 6. For small processes, this estimated duration will likely equal the
# intended duration. But for large processes, it will show the inflated time.
#
# There is also an experimental, undocumented (in USAGE), and dangerous option
# for dealing with the duration inflation in a different way: --pausetarget,
# which will pause the target process (SIGSTOP) and only let it run for the
# duration of the measurements (using: SIGCONT -> measure -> SIGSTOP). Large
# processes (> 100 Gbytes) could be paused for 1 second or longer, creating
# severe application latency. This option is deactivated in the code, and
# only exists for experimentation in a lab environment.
#
# Copyright 2018 Netflix, Inc.
# Licensed under the Apache License, Version 2.0 (the "License")
#
# 10-Jan-2018 Brendan Gregg Created this.
use strict;
use Getopt::Long;
use Time::HiRes;
$| = 1;
sub usage {
die <<USAGE_END;
USAGE: wss [options] PID duration(s)
-C # show cumulative output every duration(s)
-s secs # take duration(s) snapshots after secs pauses
-d secs # total duration of measuremnt (for -s or -C)
-P steps # profile run (cumulative), from duration(s)
-t # show additional timestamp columns
eg,
wss 181 0.01 # measure PID 181 WSS for 10 milliseconds
wss 181 5 # measure PID 181 WSS for 5 seconds (same overhead)
wss -C 181 5 # show PID 181 growth every 5 seconds
wss -C -d 10 181 1 # PID 181 growth each second for 10 seconds total
wss -s 1 181 0.01 # show a 10 ms WSS snapshot every 1 second
wss -s 0 181 1 # measure WSS every 1 second (not cumulative)
wss -P 10 181 0.01 # 10 step power-of-2 profile, starting with 0.01s
USAGE_END
}
### options
my $snapshot = -1;
my $totalsecs = 999999999;
my $cumulative = 0;
my $profile = 0;
my $moretimes = 0;
my $pausetarget = 0;
GetOptions(
'snapshot|s=f' => \$snapshot,
'duration|d=f' => \$totalsecs,
'cumulative|C' => \$cumulative,
'profile|P=i' => \$profile,
'moretimes|t' => \$moretimes,
'pausetarget' => \$pausetarget,
'help|h' => 'usage',
) or usage();
my $pid = $ARGV[0];
my $duration = $ARGV[1];
if ($pausetarget) {
print STDERR "--pausetarget disabled (too dangerous). See code.\n";
exit;
# if you comment this out, be aware you're sending SIGSTOP/SIGCONTs
# to the target process, which will pause it, creating latency. If
# wss.pl crashes or is SIGKILL'd, then the target process can be left
# in SIGSTOP and will need to be SIGCONT'd manually.
}
if (@ARGV < 2 || $ARGV[0] eq "-h" || $ARGV[0] eq "--help") {
usage();
exit;
}
if ((!!$cumulative + ($snapshot != -1) + !!$profile) > 1) {
print STDERR "ERROR: Can't combine -C, -s, and P. Exiting.\n";
exit;
}
if ($duration < 0.001) {
print STDERR "ERROR: Duration too short. Exiting.\n";
exit;
}
my $clear_ref = "/proc/$pid/clear_refs";
my $smaps = "/proc/$pid/smaps";
my @profilesecs = ($duration);
my $d;
if ($profile) {
$d = $duration;
for (my $i = 0; $i < $profile - 1; $i++) {
push(@profilesecs, $d);
$d *= 2;
}
}
if ($pausetarget) {
shift(@profilesecs);
push(@profilesecs, $d);
}
### headers
if ($profile) {
printf "Watching PID $pid page references grow, profile beginning with $duration seconds, $profile steps...\n";
} elsif ($cumulative) {
printf "Watching PID $pid page references grow, output every $duration seconds...\n";
} elsif ($snapshot != -1) {
if ($snapshot == 0) {
printf "Watching PID $pid page references for every $duration seconds...\n";
} else {
printf "Watching PID $pid page references for $duration seconds, repeating after $snapshot second pauses...\n";
}
} else {
printf "Watching PID $pid page references during $duration seconds...\n";
}
printf "%-7s %-7s ", "Slp(s)", "Dur(s)" if $moretimes;
printf "%-7s %10s %10s %10s\n", "Est(s)", "RSS(MB)", "PSS(MB)", "Ref(MB)";
### main
my ($rss, $pss, $referenced);
my ($ts0, $ts1, $ts2, $ts3, $ts4, $ts5);
my ($settime, $sleeptime, $readtime, $durtime, $esttime);
my $metric;
my $firstreset = 0;
$sleeptime = 0;
### cleanup
sub cleanup {
kill -CONT, $pid;
exit 0;
}
if ($pausetarget) {
$SIG{INT} = 'cleanup'; # Ctrl-C
$SIG{QUIT} = 'cleanup'; # Ctrl-\
$SIG{TERM} = 'cleanup'; # TERM
}
$ts0 = Time::HiRes::gettimeofday();
while (1) {
# reset referenced flags
if (not $firstreset or $snapshot != -1 or $pausetarget) {
kill -STOP, $pid if $pausetarget;
open CLEAR, ">$clear_ref" or die "ERROR: can't open $clear_ref (older kernel?): $!";
$ts1 = Time::HiRes::gettimeofday();
print CLEAR "1";
close CLEAR;
$ts2 = Time::HiRes::gettimeofday();
$settime = $ts2 - $ts1;
$firstreset = 1;
}
# pause
my $sleep = $duration;
if ($profile) {
$sleep = shift @profilesecs;
last unless defined $sleep;
}
kill -CONT, $pid if $pausetarget;
$ts3 = Time::HiRes::gettimeofday();
select(undef, undef, undef, $sleep);
$ts4 = Time::HiRes::gettimeofday();
kill -STOP, $pid if $pausetarget;
# read referenced counts
$rss = $pss = $referenced = 0;
open SMAPS, $smaps or die "ERROR: can't open $smaps: $!";
# slurp smaps quickly to minimize unwanted WSS growth during reading:
my @smaps = <SMAPS>;
$ts5 = Time::HiRes::gettimeofday();
close SMAPS;
kill -CONT, $pid if ($pausetarget and $snapshot != -1);
foreach my $line (@smaps) {
if ($line =~ /^Rss:/) {
$metric = \$rss;
} elsif ($line =~ /^Pss:/) {
$metric = \$pss;
} elsif ($line =~ /^Referenced:/) {
$metric = \$referenced;
} else {
next;
}
# now pay the split cost, after filtering out most lines:
my ($junk1, $kbytes, $junk2) = split ' ', $line;
$$metric += $kbytes;
}
# time calculations
if ($snapshot != -1 or $pausetarget) {
$sleeptime = $ts4 - $ts3;
} else {
$sleeptime += $ts4 - $ts3;
}
$readtime = $ts5 - $ts4;
$durtime = $ts5 - $ts1;
if ($pausetarget) {
$esttime = $ts4 - $ts3;
} else {
$esttime = $durtime - ($settime / 2) - ($readtime / 2);
}
# output
printf "%-7.3f %-7.3f ", $sleeptime, $durtime if $moretimes;
printf "%-7.3f %10.2f %10.2f %10.2f\n", $esttime,
$rss / 1024, $pss / 1024, $referenced / 1024;
# snopshot sleeps
if ($snapshot != -1) {
select(undef, undef, undef, $snapshot);
} elsif (not $cumulative and not $profile) {
last;
}
if ($ts5 - $ts0 >= $totalsecs) {
last;
}
}
kill -CONT, $pid if $pausetarget;