3 # iotop - display top disk I/O events by process.
4 # Written using DTrace (Solaris 10 3/05).
6 # This is measuring disk events that have made it past system caches.
8 # $Id: iotop 8 2007-08-06 05:55:26Z brendan $
10 # USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename]
11 # [-m mount_point] [-t top] [interval [count]]
13 # iotop # default output, 5 second intervals
15 # -C # don't clear the screen
16 # -D # print delta times, elapsed, us
17 # -j # print project ID
18 # -o # print disk delta times, us
19 # -P # print %I/O (disk delta times)
21 # -d device # instance name to snoop (eg, dad0)
22 # -f filename # full pathname of file to snoop
23 # -m mount_point # this FS only (will skip raw events)
24 # -t top # print top number only
26 # iotop 1 # 1 second samples
27 # iotop -C # don't clear the screen
28 # iotop -P # print %I/O (time based)
29 # iotop -j # print project IDs
30 # iotop -Z # print zone IDs
31 # iotop -t 20 # print top 20 lines only
32 # iotop -C 5 12 # print 12 x 5 second samples
37 # PPID parent process ID
40 # CMD process command name
42 # MAJ device major number
43 # MIN device minor number
44 # D direction, Read or Write
45 # BYTES total size of operations, bytes
46 # ELAPSED total elapsed from request to completion, us
47 # DISKTIME total time for disk to complete request, us
48 # %I/O percent disk I/O, based on time (DISKTIME)
49 # load 1 min load average
50 # disk_r total disk read Kbytes for sample
51 # disk_w total disk write Kbytes for sample
54 # * There are two different delta times reported. -D prints the
55 # elapsed time from the disk request (strategy) to the disk completion
56 # (iodone); -o prints the time for the disk to complete that event
57 # since it's last event (time between iodones), or, the time to the
58 # strategy if the disk had been idle.
59 # * The %I/O value can exceed 100%. It represents how busy a process is
60 # making the disks, in terms of a single disk. A value of 200% could
61 # mean 2 disks are busy at 100%, or 4 disks at 50%...
64 # BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace
65 # Solaris Dynamic Tracing Guide, http://docs.sun.com
66 # DTrace Tools, http://www.brendangregg.com/dtrace.html
68 # INSPIRATION: top(1) by William LeFebvre
70 # COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg.
74 # The contents of this file are subject to the terms of the
75 # Common Development and Distribution License, Version 1.0 only
76 # (the "License"). You may not use this file except in compliance
79 # You can obtain a copy of the license at Docs/cddl1.txt
80 # or http://www.opensolaris.org/os/licensing.
81 # See the License for the specific language governing permissions
82 # and limitations under the License.
87 # - This can print errors while running on servers with Veritas volumes.
89 # Author: Brendan Gregg [Sydney, Australia]
91 # 15-Jul-2005 Brendan Gregg Created this.
92 # 20-Apr-2006 " " Last update.
96 ##############################
97 # --- Process Arguments ---
100 ### default variables
101 opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0
102 opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=.
103 opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0
106 while getopts CDd:f:hjm:oPt:Z name
110 D) opt_elapsed=1; opt_bytes=0 ;;
111 d) opt_device=1; device=$OPTARG ;;
112 f) opt_file=1; filename=$OPTARG ;;
113 j) opt_proj=1; opt_def=0 ;;
114 m) opt_mount=1; mount=$OPTARG ;;
115 o) opt_dtime=1; opt_bytes=0 ;;
116 P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;;
117 t) opt_top=1; top=$OPTARG ;;
118 Z) opt_zone=1; opt_def=0 ;;
120 USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename]
121 [-m mount_point] [-t top] [interval [count]]
123 -C # don't clear the screen
124 -D # print delta times, elapsed, us
125 -j # print project ID
126 -o # print disk delta times, us
127 -P # print %I/O (disk delta times)
129 -d device # instance name to snoop
130 -f filename # snoop this file only
131 -m mount_point # this FS only
132 -t top # print top number only
134 iotop # default output, 5 second samples
135 iotop 1 # 1 second samples
136 iotop -P # print %I/O (time based)
137 iotop -m / # snoop events on filesystem / only
138 iotop -t 20 # print top 20 lines only
139 iotop -C 5 12 # print 12 x 5 second samples
145 shift $(( $OPTIND - 1 ))
148 if [[ "$1" > 0 ]]; then
151 if [[ "$1" > 0 ]]; then
154 if (( opt_proj && opt_zone )); then
157 if (( opt_elapsed && opt_dtime )); then
160 if (( opt_device || opt_mount || opt_file )); then
163 if (( opt_clear )); then
171 #################################
172 # --- Main Program, DTrace ---
174 /usr/sbin/dtrace -n '
176 * Command line arguments
178 inline int OPT_def = '$opt_def';
179 inline int OPT_proj = '$opt_proj';
180 inline int OPT_zone = '$opt_zone';
181 inline int OPT_clear = '$opt_clear';
182 inline int OPT_bytes = '$opt_bytes';
183 inline int OPT_elapsed = '$opt_elapsed';
184 inline int OPT_dtime = '$opt_dtime';
185 inline int OPT_percent = '$opt_percent';
186 inline int OPT_device = '$opt_device';
187 inline int OPT_mount = '$opt_mount';
188 inline int OPT_file = '$opt_file';
189 inline int OPT_top = '$opt_top';
190 inline int INTERVAL = '$interval';
191 inline int COUNTER = '$count';
192 inline int FILTER = '$filter';
193 inline int TOP = '$top';
194 inline string DEVICE = "'$device'";
195 inline string FILENAME = "'$filename'";
196 inline string MOUNT = "'$mount'";
197 inline string CLEAR = "'$clearstr'";
199 #pragma D option quiet
201 /* boost the following if you get "dynamic variable drops" */
202 #pragma D option dynvarsize=8m
211 /* starting values */
217 printf("Tracing... Please wait.\n");
221 * Check event is being traced
226 /* default is to trace unless filtering, */
227 this->ok = FILTER ? 0 : 1;
229 /* check each filter, */
230 (OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1;
231 (OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1;
232 (OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1;
236 * Reset last_event for disk idle -> start
237 * this prevents idle time being counted as disk time.
240 /! pending[args[1]->dev_statname]/
242 /* save last disk event */
243 last_event[args[1]->dev_statname] = timestamp;
247 * Store entry details
252 /* these are used as a unique disk event key, */
253 this->dev = args[0]->b_edev;
254 this->blk = args[0]->b_blkno;
256 /* save disk event details, */
257 start_uid[this->dev, this->blk] = uid;
258 start_pid[this->dev, this->blk] = pid;
259 start_ppid[this->dev, this->blk] = ppid;
260 start_comm[this->dev, this->blk] = execname;
261 start_time[this->dev, this->blk] = timestamp;
262 start_proj[this->dev, this->blk] = curpsinfo->pr_projid;
263 start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid;
264 start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W";
265 disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0;
266 disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount;
268 /* increase disk event pending count */
269 pending[args[1]->dev_statname]++;
273 * Process and Print completion
278 /* decrease disk event pending count */
279 pending[args[1]->dev_statname]--;
285 /* fetch entry values */
286 this->dev = args[0]->b_edev;
287 this->blk = args[0]->b_blkno;
288 this->suid = start_uid[this->dev, this->blk];
289 this->spid = start_pid[this->dev, this->blk];
290 this->sppid = start_ppid[this->dev, this->blk];
291 this->sproj = start_proj[this->dev, this->blk];
292 this->szone = start_zone[this->dev, this->blk];
293 self->scomm = start_comm[this->dev, this->blk];
294 this->stime = start_time[this->dev, this->blk];
295 this->etime = timestamp; /* endtime */
296 this->elapsed = this->etime - this->stime;
297 self->rw = start_rw[this->dev, this->blk];
298 this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 :
299 timestamp - last_event[args[1]->dev_statname];
302 start_uid[this->dev, this->blk] = 0;
303 start_pid[this->dev, this->blk] = 0;
304 start_ppid[this->dev, this->blk] = 0;
305 start_time[this->dev, this->blk] = 0;
306 start_comm[this->dev, this->blk] = 0;
307 start_zone[this->dev, this->blk] = 0;
308 start_proj[this->dev, this->blk] = 0;
309 start_rw[this->dev, this->blk] = 0;
312 * Choose statistic to track
314 OPT_bytes ? this->value = args[0]->b_bcount : 1;
315 OPT_elapsed ? this->value = this->elapsed / 1000 : 1;
316 OPT_dtime ? this->value = this->dtime / 1000 : 1;
321 OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm,
322 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
323 self->rw] = sum(this->value) : 1;
324 OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm,
325 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
326 self->rw] = sum(this->value) : 1;
327 OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm,
328 args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor,
329 self->rw] = sum(this->value) : 1;
331 /* save last disk event */
332 last_event[args[1]->dev_statname] = timestamp;
339 * Prevent pending from underflowing
340 * this can happen if this program is started during disk events.
343 /pending[args[1]->dev_statname] < 0/
345 pending[args[1]->dev_statname] = 0;
362 /* fetch 1 min load average */
363 this->load1a = `hp_avenrun[0] / 65536;
364 this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536;
366 /* convert counters to Kbytes */
371 OPT_clear ? printf("%s", CLEAR) : 1;
372 printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n",
373 walltimestamp, this->load1a, this->load1b, disk_r, disk_w);
376 OPT_def ? printf(" UID ") : 1;
377 OPT_proj ? printf(" PROJ ") : 1;
378 OPT_zone ? printf(" ZONE ") : 1;
379 printf("%6s %6s %-16s %-7s %3s %3s %1s",
380 "PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D");
381 OPT_bytes ? printf(" %16s\n", "BYTES") : 1;
382 OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1;
383 OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1;
384 OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1;
386 /* truncate to top lines if needed */
387 OPT_top ? trunc(@out, TOP) : 1;
389 /* normalise to percentage if needed */
390 OPT_percent ? normalize(@out, INTERVAL * 10000) : 1;
394 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) :
395 printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out);