diff options
Diffstat (limited to 'cddl/contrib/dtracetoolkit/iotop')
-rwxr-xr-x | cddl/contrib/dtracetoolkit/iotop | 422 |
1 files changed, 422 insertions, 0 deletions
diff --git a/cddl/contrib/dtracetoolkit/iotop b/cddl/contrib/dtracetoolkit/iotop new file mode 100755 index 0000000..788c492 --- /dev/null +++ b/cddl/contrib/dtracetoolkit/iotop @@ -0,0 +1,422 @@ +#!/usr/bin/ksh +# +# iotop - display top disk I/O events by process. +# Written using DTrace (Solaris 10 3/05). +# +# This is measuring disk events that have made it past system caches. +# +# $Id: iotop 8 2007-08-06 05:55:26Z brendan $ +# +# USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] +# [-m mount_point] [-t top] [interval [count]] +# +# iotop # default output, 5 second intervals +# +# -C # don't clear the screen +# -D # print delta times, elapsed, us +# -j # print project ID +# -o # print disk delta times, us +# -P # print %I/O (disk delta times) +# -Z # print zone ID +# -d device # instance name to snoop (eg, dad0) +# -f filename # full pathname of file to snoop +# -m mount_point # this FS only (will skip raw events) +# -t top # print top number only +# eg, +# iotop 1 # 1 second samples +# iotop -C # don't clear the screen +# iotop -P # print %I/O (time based) +# iotop -j # print project IDs +# iotop -Z # print zone IDs +# iotop -t 20 # print top 20 lines only +# iotop -C 5 12 # print 12 x 5 second samples +# +# FIELDS: +# UID user ID +# PID process ID +# PPID parent process ID +# PROJ project ID +# ZONE zone ID +# CMD process command name +# DEVICE device name +# MAJ device major number +# MIN device minor number +# D direction, Read or Write +# BYTES total size of operations, bytes +# ELAPSED total elapsed from request to completion, us +# DISKTIME total time for disk to complete request, us +# %I/O percent disk I/O, based on time (DISKTIME) +# load 1 min load average +# disk_r total disk read Kbytes for sample +# disk_w total disk write Kbytes for sample +# +# NOTE: +# * There are two different delta times reported. -D prints the +# elapsed time from the disk request (strategy) to the disk completion +# (iodone); -o prints the time for the disk to complete that event +# since it's last event (time between iodones), or, the time to the +# strategy if the disk had been idle. +# * The %I/O value can exceed 100%. It represents how busy a process is +# making the disks, in terms of a single disk. A value of 200% could +# mean 2 disks are busy at 100%, or 4 disks at 50%... +# +# SEE ALSO: iosnoop +# BigAdmin: DTrace, http://www.sun.com/bigadmin/content/dtrace +# Solaris Dynamic Tracing Guide, http://docs.sun.com +# DTrace Tools, http://www.brendangregg.com/dtrace.html +# +# INSPIRATION: top(1) by William LeFebvre +# +# COPYRIGHT: Copyright (c) 2005, 2006 Brendan Gregg. +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at Docs/cddl1.txt +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# CDDL HEADER END +# +# KNOWN BUGS: +# - This can print errors while running on servers with Veritas volumes. +# +# Author: Brendan Gregg [Sydney, Australia] +# +# 15-Jul-2005 Brendan Gregg Created this. +# 20-Apr-2006 " " Last update. +# + + +############################## +# --- Process Arguments --- +# + +### default variables +opt_device=0; opt_file=0; opt_mount=0; opt_clear=1; opt_proj=0; opt_zone=0 +opt_percent=0; opt_def=1; opt_bytes=1; filter=0; device=.; filename=.; mount=. +opt_top=0; opt_elapsed=0; opt_dtime=0; interval=5; count=-1; top=0 + +### process options +while getopts CDd:f:hjm:oPt:Z name +do + case $name in + C) opt_clear=0 ;; + D) opt_elapsed=1; opt_bytes=0 ;; + d) opt_device=1; device=$OPTARG ;; + f) opt_file=1; filename=$OPTARG ;; + j) opt_proj=1; opt_def=0 ;; + m) opt_mount=1; mount=$OPTARG ;; + o) opt_dtime=1; opt_bytes=0 ;; + P) opt_percent=1; opt_dtime=1; opt_bytes=0 ;; + t) opt_top=1; top=$OPTARG ;; + Z) opt_zone=1; opt_def=0 ;; + h|?) cat <<-END >&2 + USAGE: iotop [-C] [-D|-o|-P] [-j|-Z] [-d device] [-f filename] + [-m mount_point] [-t top] [interval [count]] + + -C # don't clear the screen + -D # print delta times, elapsed, us + -j # print project ID + -o # print disk delta times, us + -P # print %I/O (disk delta times) + -Z # print zone ID + -d device # instance name to snoop + -f filename # snoop this file only + -m mount_point # this FS only + -t top # print top number only + eg, + iotop # default output, 5 second samples + iotop 1 # 1 second samples + iotop -P # print %I/O (time based) + iotop -m / # snoop events on filesystem / only + iotop -t 20 # print top 20 lines only + iotop -C 5 12 # print 12 x 5 second samples + END + exit 1 + esac +done + +shift $(( $OPTIND - 1 )) + +### option logic +if [[ "$1" > 0 ]]; then + interval=$1; shift +fi +if [[ "$1" > 0 ]]; then + count=$1; shift +fi +if (( opt_proj && opt_zone )); then + opt_proj=0 +fi +if (( opt_elapsed && opt_dtime )); then + opt_elapsed=0 +fi +if (( opt_device || opt_mount || opt_file )); then + filter=1 +fi +if (( opt_clear )); then + clearstr=`clear` +else + clearstr=. +fi + + + +################################# +# --- Main Program, DTrace --- +# +/usr/sbin/dtrace -n ' + /* + * Command line arguments + */ + inline int OPT_def = '$opt_def'; + inline int OPT_proj = '$opt_proj'; + inline int OPT_zone = '$opt_zone'; + inline int OPT_clear = '$opt_clear'; + inline int OPT_bytes = '$opt_bytes'; + inline int OPT_elapsed = '$opt_elapsed'; + inline int OPT_dtime = '$opt_dtime'; + inline int OPT_percent = '$opt_percent'; + inline int OPT_device = '$opt_device'; + inline int OPT_mount = '$opt_mount'; + inline int OPT_file = '$opt_file'; + inline int OPT_top = '$opt_top'; + inline int INTERVAL = '$interval'; + inline int COUNTER = '$count'; + inline int FILTER = '$filter'; + inline int TOP = '$top'; + inline string DEVICE = "'$device'"; + inline string FILENAME = "'$filename'"; + inline string MOUNT = "'$mount'"; + inline string CLEAR = "'$clearstr'"; + + #pragma D option quiet + + /* boost the following if you get "dynamic variable drops" */ + #pragma D option dynvarsize=8m + + /* + * Print header + */ + dtrace:::BEGIN + { + last_event[""] = 0; + + /* starting values */ + counts = COUNTER; + secs = INTERVAL; + disk_r = 0; + disk_w = 0; + + printf("Tracing... Please wait.\n"); + } + + /* + * Check event is being traced + */ + io:genunix::start, + io:genunix::done + { + /* default is to trace unless filtering, */ + this->ok = FILTER ? 0 : 1; + + /* check each filter, */ + (OPT_device == 1 && DEVICE == args[1]->dev_statname)? this->ok = 1 : 1; + (OPT_file == 1 && FILENAME == args[2]->fi_pathname) ? this->ok = 1 : 1; + (OPT_mount == 1 && MOUNT == args[2]->fi_mount) ? this->ok = 1 : 1; + } + + /* + * Reset last_event for disk idle -> start + * this prevents idle time being counted as disk time. + */ + io:genunix::start + /! pending[args[1]->dev_statname]/ + { + /* save last disk event */ + last_event[args[1]->dev_statname] = timestamp; + } + + /* + * Store entry details + */ + io:genunix::start + /this->ok/ + { + /* these are used as a unique disk event key, */ + this->dev = args[0]->b_edev; + this->blk = args[0]->b_blkno; + + /* save disk event details, */ + start_uid[this->dev, this->blk] = uid; + start_pid[this->dev, this->blk] = pid; + start_ppid[this->dev, this->blk] = ppid; + start_comm[this->dev, this->blk] = execname; + start_time[this->dev, this->blk] = timestamp; + start_proj[this->dev, this->blk] = curpsinfo->pr_projid; + start_zone[this->dev, this->blk] = curpsinfo->pr_zoneid; + start_rw[this->dev, this->blk] = args[0]->b_flags & B_READ ? "R" : "W"; + disk_r += args[0]->b_flags & B_READ ? args[0]->b_bcount : 0; + disk_w += args[0]->b_flags & B_READ ? 0 : args[0]->b_bcount; + + /* increase disk event pending count */ + pending[args[1]->dev_statname]++; + } + + /* + * Process and Print completion + */ + io:genunix::done + /this->ok/ + { + /* decrease disk event pending count */ + pending[args[1]->dev_statname]--; + + /* + * Process details + */ + + /* fetch entry values */ + this->dev = args[0]->b_edev; + this->blk = args[0]->b_blkno; + this->suid = start_uid[this->dev, this->blk]; + this->spid = start_pid[this->dev, this->blk]; + this->sppid = start_ppid[this->dev, this->blk]; + this->sproj = start_proj[this->dev, this->blk]; + this->szone = start_zone[this->dev, this->blk]; + self->scomm = start_comm[this->dev, this->blk]; + this->stime = start_time[this->dev, this->blk]; + this->etime = timestamp; /* endtime */ + this->elapsed = this->etime - this->stime; + self->rw = start_rw[this->dev, this->blk]; + this->dtime = last_event[args[1]->dev_statname] == 0 ? 0 : + timestamp - last_event[args[1]->dev_statname]; + + /* memory cleanup */ + start_uid[this->dev, this->blk] = 0; + start_pid[this->dev, this->blk] = 0; + start_ppid[this->dev, this->blk] = 0; + start_time[this->dev, this->blk] = 0; + start_comm[this->dev, this->blk] = 0; + start_zone[this->dev, this->blk] = 0; + start_proj[this->dev, this->blk] = 0; + start_rw[this->dev, this->blk] = 0; + + /* + * Choose statistic to track + */ + OPT_bytes ? this->value = args[0]->b_bcount : 1; + OPT_elapsed ? this->value = this->elapsed / 1000 : 1; + OPT_dtime ? this->value = this->dtime / 1000 : 1; + + /* + * Save details + */ + OPT_def ? @out[this->suid, this->spid, this->sppid, self->scomm, + args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, + self->rw] = sum(this->value) : 1; + OPT_proj ? @out[this->sproj, this->spid, this->sppid, self->scomm, + args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, + self->rw] = sum(this->value) : 1; + OPT_zone ? @out[this->szone, this->spid, this->sppid, self->scomm, + args[1]->dev_statname, args[1]->dev_major, args[1]->dev_minor, + self->rw] = sum(this->value) : 1; + + /* save last disk event */ + last_event[args[1]->dev_statname] = timestamp; + + self->scomm = 0; + self->rw = 0; + } + + /* + * Prevent pending from underflowing + * this can happen if this program is started during disk events. + */ + io:genunix::done + /pending[args[1]->dev_statname] < 0/ + { + pending[args[1]->dev_statname] = 0; + } + + /* + * Timer + */ + profile:::tick-1sec + { + secs--; + } + + /* + * Print Report + */ + profile:::tick-1sec + /secs == 0/ + { + /* fetch 1 min load average */ + this->load1a = `hp_avenrun[0] / 65536; + this->load1b = ((`hp_avenrun[0] % 65536) * 100) / 65536; + + /* convert counters to Kbytes */ + disk_r /= 1024; + disk_w /= 1024; + + /* print status */ + OPT_clear ? printf("%s", CLEAR) : 1; + printf("%Y, load: %d.%02d, disk_r: %6d KB, disk_w: %6d KB\n\n", + walltimestamp, this->load1a, this->load1b, disk_r, disk_w); + + /* print headers */ + OPT_def ? printf(" UID ") : 1; + OPT_proj ? printf(" PROJ ") : 1; + OPT_zone ? printf(" ZONE ") : 1; + printf("%6s %6s %-16s %-7s %3s %3s %1s", + "PID", "PPID", "CMD", "DEVICE", "MAJ", "MIN", "D"); + OPT_bytes ? printf(" %16s\n", "BYTES") : 1; + OPT_elapsed ? printf(" %16s\n", "ELAPSED") : 1; + OPT_dtime && ! OPT_percent ? printf(" %16s\n", "DISKTIME") : 1; + OPT_dtime && OPT_percent ? printf(" %6s\n", "%I/O") : 1; + + /* truncate to top lines if needed */ + OPT_top ? trunc(@out, TOP) : 1; + + /* normalise to percentage if needed */ + OPT_percent ? normalize(@out, INTERVAL * 10000) : 1; + + /* print data */ + ! OPT_percent ? + printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %16@d\n", @out) : + printa("%5d %6d %6d %-16s %-7s %3d %3d %1s %6@d\n", @out); + printf("\n"); + + /* clear data */ + trunc(@out); + disk_r = 0; + disk_w = 0; + secs = INTERVAL; + counts--; + } + + /* + * End of program + */ + profile:::tick-1sec + /counts == 0/ + { + exit(0); + } + + /* + * Cleanup for Ctrl-C + */ + dtrace:::END + { + trunc(@out); + } +' |