diff options
Diffstat (limited to 'cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c')
-rw-r--r-- | cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c | 478 |
1 files changed, 334 insertions, 144 deletions
diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c index 797e7e3..c88a92c 100644 --- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c +++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c @@ -25,7 +25,7 @@ /* * Copyright (c) 2011, Joyent, Inc. All rights reserved. - * Copyright (c) 2011 by Delphix. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #include <stdlib.h> @@ -39,6 +39,7 @@ #include <alloca.h> #endif #include <dt_impl.h> +#include <dt_pq.h> #if !defined(sun) #include <libproc_compat.h> #endif @@ -443,17 +444,8 @@ dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last, offs += epd->dtepd_size; do { - if (offs >= buf->dtbd_size) { - /* - * We're at the end -- maybe. If the oldest - * record is non-zero, we need to wrap. - */ - if (buf->dtbd_oldest != 0) { - offs = 0; - } else { - goto out; - } - } + if (offs >= buf->dtbd_size) + goto out; next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs); @@ -2014,26 +2006,27 @@ dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data, } static int -dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf, +dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, + dtrace_bufdesc_t *buf, boolean_t just_one, dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg) { dtrace_epid_t id; - size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size; + size_t offs; int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET); int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET); int rval, i, n; - dtrace_epid_t last = DTRACE_EPIDNONE; uint64_t tracememsize = 0; dtrace_probedata_t data; uint64_t drops; - caddr_t addr; + data.dtpda_flow = dtp->dt_flow; + data.dtpda_indent = dtp->dt_indent; + data.dtpda_prefix = dtp->dt_prefix; bzero(&data, sizeof (data)); data.dtpda_handle = dtp; data.dtpda_cpu = cpu; -again: - for (offs = start; offs < end; ) { + for (offs = buf->dtbd_oldest; offs < buf->dtbd_size; ) { dtrace_eprobedesc_t *epd; /* @@ -2068,7 +2061,8 @@ again: } if (flow) - (void) dt_flowindent(dtp, &data, last, buf, offs); + (void) dt_flowindent(dtp, &data, dtp->dt_last_epid, + buf, offs); rval = (*efunc)(&data, arg); @@ -2087,6 +2081,7 @@ again: return (dt_set_errno(dtp, EDT_BADRVAL)); for (i = 0; i < epd->dtepd_nrecs; i++) { + caddr_t addr; dtrace_recdesc_t *rec = &epd->dtepd_rec[i]; dtrace_actkind_t act = rec->dtrd_action; @@ -2458,14 +2453,16 @@ nextrec: rval = (*rfunc)(&data, NULL, arg); nextepid: offs += epd->dtepd_size; - last = id; + dtp->dt_last_epid = id; + if (just_one) { + buf->dtbd_oldest = offs; + break; + } } - if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) { - end = buf->dtbd_oldest; - start = 0; - goto again; - } + dtp->dt_flow = data.dtpda_flow; + dtp->dt_indent = data.dtpda_indent; + dtp->dt_prefix = data.dtpda_prefix; if ((drops = buf->dtbd_drops) == 0) return (0); @@ -2478,6 +2475,130 @@ nextepid: return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops)); } +/* + * Reduce memory usage by shrinking the buffer if it's no more than half full. + * Note, we need to preserve the alignment of the data at dtbd_oldest, which is + * only 4-byte aligned. + */ +static void +dt_realloc_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf, int cursize) +{ + uint64_t used = buf->dtbd_size - buf->dtbd_oldest; + if (used < cursize / 2) { + int misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1); + char *newdata = dt_alloc(dtp, used + misalign); + if (newdata == NULL) + return; + bzero(newdata, misalign); + bcopy(buf->dtbd_data + buf->dtbd_oldest, + newdata + misalign, used); + dt_free(dtp, buf->dtbd_data); + buf->dtbd_oldest = misalign; + buf->dtbd_size = used + misalign; + buf->dtbd_data = newdata; + } +} + +/* + * If the ring buffer has wrapped, the data is not in order. Rearrange it + * so that it is. Note, we need to preserve the alignment of the data at + * dtbd_oldest, which is only 4-byte aligned. + */ +static int +dt_unring_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf) +{ + int misalign; + char *newdata, *ndp; + + if (buf->dtbd_oldest == 0) + return (0); + + misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1); + newdata = ndp = dt_alloc(dtp, buf->dtbd_size + misalign); + + if (newdata == NULL) + return (-1); + + assert(0 == (buf->dtbd_size & (sizeof (uint64_t) - 1))); + + bzero(ndp, misalign); + ndp += misalign; + + bcopy(buf->dtbd_data + buf->dtbd_oldest, ndp, + buf->dtbd_size - buf->dtbd_oldest); + ndp += buf->dtbd_size - buf->dtbd_oldest; + + bcopy(buf->dtbd_data, ndp, buf->dtbd_oldest); + + dt_free(dtp, buf->dtbd_data); + buf->dtbd_oldest = 0; + buf->dtbd_data = newdata; + buf->dtbd_size += misalign; + + return (0); +} + +static void +dt_put_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf) +{ + dt_free(dtp, buf->dtbd_data); + dt_free(dtp, buf); +} + +/* + * Returns 0 on success, in which case *cbp will be filled in if we retrieved + * data, or NULL if there is no data for this CPU. + * Returns -1 on failure and sets dt_errno. + */ +static int +dt_get_buf(dtrace_hdl_t *dtp, int cpu, dtrace_bufdesc_t **bufp) +{ + dtrace_optval_t size; + dtrace_bufdesc_t *buf = dt_zalloc(dtp, sizeof (*buf)); + int error; + + if (buf == NULL) + return (-1); + + (void) dtrace_getopt(dtp, "bufsize", &size); + buf->dtbd_data = dt_alloc(dtp, size); + if (buf->dtbd_data == NULL) { + dt_free(dtp, buf); + return (-1); + } + buf->dtbd_size = size; + buf->dtbd_cpu = cpu; + +#if defined(sun) + if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { +#else + if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) { +#endif + dt_put_buf(dtp, buf); + /* + * If we failed with ENOENT, it may be because the + * CPU was unconfigured -- this is okay. Any other + * error, however, is unexpected. + */ + if (errno == ENOENT) { + *bufp = NULL; + return (0); + } + + return (dt_set_errno(dtp, errno)); + } + + error = dt_unring_buf(dtp, buf); + if (error != 0) { + dt_put_buf(dtp, buf); + return (error); + } + dt_realloc_buf(dtp, buf, size); + + *bufp = buf; + return (0); +} + typedef struct dt_begin { dtrace_consume_probe_f *dtbgn_probefunc; dtrace_consume_rec_f *dtbgn_recfunc; @@ -2541,7 +2662,7 @@ dt_consume_begin_error(const dtrace_errdata_t *data, void *arg) } static int -dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf, +dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg) { /* @@ -2565,33 +2686,19 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf, * first pass, and that we only process ERROR enablings _not_ induced * by BEGIN enablings in the second pass. */ + dt_begin_t begin; processorid_t cpu = dtp->dt_beganon; - dtrace_bufdesc_t nbuf; -#if !defined(sun) - dtrace_bufdesc_t *pbuf; -#endif int rval, i; static int max_ncpus; - dtrace_optval_t size; + dtrace_bufdesc_t *buf; dtp->dt_beganon = -1; -#if defined(sun) - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { -#else - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) { -#endif - /* - * We really don't expect this to fail, but it is at least - * technically possible for this to fail with ENOENT. In this - * case, we just drive on... - */ - if (errno == ENOENT) - return (0); - - return (dt_set_errno(dtp, errno)); - } + if (dt_get_buf(dtp, cpu, &buf) != 0) + return (-1); + if (buf == NULL) + return (0); if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) { /* @@ -2599,7 +2706,10 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf, * we are, we actually processed any END probes on another * CPU. We can simply consume this buffer and return. */ - return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg)); + rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE, + pf, rf, arg); + dt_put_buf(dtp, buf); + return (rval); } begin.dtbgn_probefunc = pf; @@ -2616,61 +2726,41 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf, dtp->dt_errhdlr = dt_consume_begin_error; dtp->dt_errarg = &begin; - rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe, - dt_consume_begin_record, &begin); + rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE, + dt_consume_begin_probe, dt_consume_begin_record, &begin); dtp->dt_errhdlr = begin.dtbgn_errhdlr; dtp->dt_errarg = begin.dtbgn_errarg; - if (rval != 0) + if (rval != 0) { + dt_put_buf(dtp, buf); return (rval); - - /* - * Now allocate a new buffer. We'll use this to deal with every other - * CPU. - */ - bzero(&nbuf, sizeof (dtrace_bufdesc_t)); - (void) dtrace_getopt(dtp, "bufsize", &size); - if ((nbuf.dtbd_data = malloc(size)) == NULL) - return (dt_set_errno(dtp, EDT_NOMEM)); + } if (max_ncpus == 0) max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1; for (i = 0; i < max_ncpus; i++) { - nbuf.dtbd_cpu = i; - + dtrace_bufdesc_t *nbuf; if (i == cpu) continue; -#if defined(sun) - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) { -#else - pbuf = &nbuf; - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &pbuf) == -1) { -#endif - /* - * If we failed with ENOENT, it may be because the - * CPU was unconfigured -- this is okay. Any other - * error, however, is unexpected. - */ - if (errno == ENOENT) - continue; - - free(nbuf.dtbd_data); - - return (dt_set_errno(dtp, errno)); + if (dt_get_buf(dtp, i, &nbuf) != 0) { + dt_put_buf(dtp, buf); + return (-1); } + if (nbuf == NULL) + continue; - if ((rval = dt_consume_cpu(dtp, fp, - i, &nbuf, pf, rf, arg)) != 0) { - free(nbuf.dtbd_data); + rval = dt_consume_cpu(dtp, fp, i, nbuf, B_FALSE, + pf, rf, arg); + dt_put_buf(dtp, nbuf); + if (rval != 0) { + dt_put_buf(dtp, buf); return (rval); } } - free(nbuf.dtbd_data); - /* * Okay -- we're done with the other buffers. Now we want to * reconsume the first buffer -- but this time we're looking for @@ -2685,8 +2775,8 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf, dtp->dt_errhdlr = dt_consume_begin_error; dtp->dt_errarg = &begin; - rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe, - dt_consume_begin_record, &begin); + rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE, + dt_consume_begin_probe, dt_consume_begin_record, &begin); dtp->dt_errhdlr = begin.dtbgn_errhdlr; dtp->dt_errarg = begin.dtbgn_errarg; @@ -2694,11 +2784,32 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf, return (rval); } +/* ARGSUSED */ +static uint64_t +dt_buf_oldest(void *elem, void *arg) +{ + dtrace_bufdesc_t *buf = elem; + size_t offs = buf->dtbd_oldest; + + while (offs < buf->dtbd_size) { + dtrace_rechdr_t *dtrh = + /* LINTED - alignment */ + (dtrace_rechdr_t *)(buf->dtbd_data + offs); + if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { + offs += sizeof (dtrace_epid_t); + } else { + return (DTRACE_RECORD_LOAD_TIMESTAMP(dtrh)); + } + } + + /* There are no records left; use the time the buffer was retrieved. */ + return (buf->dtbd_timestamp); +} + int dtrace_consume(dtrace_hdl_t *dtp, FILE *fp, dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg) { - dtrace_bufdesc_t *buf = &dtp->dt_buf; dtrace_optval_t size; static int max_ncpus; int i, rval; @@ -2726,79 +2837,158 @@ dtrace_consume(dtrace_hdl_t *dtp, FILE *fp, if (rf == NULL) rf = (dtrace_consume_rec_f *)dt_nullrec; - if (buf->dtbd_data == NULL) { - (void) dtrace_getopt(dtp, "bufsize", &size); - if ((buf->dtbd_data = malloc(size)) == NULL) - return (dt_set_errno(dtp, EDT_NOMEM)); - - buf->dtbd_size = size; - } - - /* - * If we have just begun, we want to first process the CPU that - * executed the BEGIN probe (if any). - */ - if (dtp->dt_active && dtp->dt_beganon != -1) { - buf->dtbd_cpu = dtp->dt_beganon; - if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0) - return (rval); - } - - for (i = 0; i < max_ncpus; i++) { - buf->dtbd_cpu = i; - + if (dtp->dt_options[DTRACEOPT_TEMPORAL] == DTRACEOPT_UNSET) { /* - * If we have stopped, we want to process the CPU on which the - * END probe was processed only _after_ we have processed - * everything else. + * The output will not be in the order it was traced. Rather, + * we will consume all of the data from each CPU's buffer in + * turn. We apply special handling for the records from BEGIN + * and END probes so that they are consumed first and last, + * respectively. + * + * If we have just begun, we want to first process the CPU that + * executed the BEGIN probe (if any). */ - if (dtp->dt_stopped && (i == dtp->dt_endedon)) - continue; + if (dtp->dt_active && dtp->dt_beganon != -1 && + (rval = dt_consume_begin(dtp, fp, pf, rf, arg)) != 0) + return (rval); + + for (i = 0; i < max_ncpus; i++) { + dtrace_bufdesc_t *buf; -#if defined(sun) - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { -#else - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) { -#endif /* - * If we failed with ENOENT, it may be because the - * CPU was unconfigured -- this is okay. Any other - * error, however, is unexpected. + * If we have stopped, we want to process the CPU on + * which the END probe was processed only _after_ we + * have processed everything else. */ - if (errno == ENOENT) + if (dtp->dt_stopped && (i == dtp->dt_endedon)) continue; - return (dt_set_errno(dtp, errno)); + if (dt_get_buf(dtp, i, &buf) != 0) + return (-1); + if (buf == NULL) + continue; + + dtp->dt_flow = 0; + dtp->dt_indent = 0; + dtp->dt_prefix = NULL; + rval = dt_consume_cpu(dtp, fp, i, + buf, B_FALSE, pf, rf, arg); + dt_put_buf(dtp, buf); + if (rval != 0) + return (rval); } + if (dtp->dt_stopped) { + dtrace_bufdesc_t *buf; + + if (dt_get_buf(dtp, dtp->dt_endedon, &buf) != 0) + return (-1); + if (buf == NULL) + return (0); - if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0) + rval = dt_consume_cpu(dtp, fp, dtp->dt_endedon, + buf, B_FALSE, pf, rf, arg); + dt_put_buf(dtp, buf); return (rval); - } + } + } else { + /* + * The output will be in the order it was traced (or for + * speculations, when it was committed). We retrieve a buffer + * from each CPU and put it into a priority queue, which sorts + * based on the first entry in the buffer. This is sufficient + * because entries within a buffer are already sorted. + * + * We then consume records one at a time, always consuming the + * oldest record, as determined by the priority queue. When + * we reach the end of the time covered by these buffers, + * we need to stop and retrieve more records on the next pass. + * The kernel tells us the time covered by each buffer, in + * dtbd_timestamp. The first buffer's timestamp tells us the + * time covered by all buffers, as subsequently retrieved + * buffers will cover to a more recent time. + */ - if (!dtp->dt_stopped) - return (0); + uint64_t *drops = alloca(max_ncpus * sizeof (uint64_t)); + uint64_t first_timestamp = 0; + uint_t cookie = 0; + dtrace_bufdesc_t *buf; - buf->dtbd_cpu = dtp->dt_endedon; + bzero(drops, max_ncpus * sizeof (uint64_t)); + + if (dtp->dt_bufq == NULL) { + dtp->dt_bufq = dt_pq_init(dtp, max_ncpus * 2, + dt_buf_oldest, NULL); + if (dtp->dt_bufq == NULL) /* ENOMEM */ + return (-1); + } + + /* Retrieve data from each CPU. */ + (void) dtrace_getopt(dtp, "bufsize", &size); + for (i = 0; i < max_ncpus; i++) { + dtrace_bufdesc_t *buf; + + if (dt_get_buf(dtp, i, &buf) != 0) + return (-1); + if (buf != NULL) { + if (first_timestamp == 0) + first_timestamp = buf->dtbd_timestamp; + assert(buf->dtbd_timestamp >= first_timestamp); + + dt_pq_insert(dtp->dt_bufq, buf); + drops[i] = buf->dtbd_drops; + buf->dtbd_drops = 0; + } + } + + /* Consume records. */ + for (;;) { + dtrace_bufdesc_t *buf = dt_pq_pop(dtp->dt_bufq); + uint64_t timestamp; + + if (buf == NULL) + break; + + timestamp = dt_buf_oldest(buf, dtp); + assert(timestamp >= dtp->dt_last_timestamp); + dtp->dt_last_timestamp = timestamp; + + if (timestamp == buf->dtbd_timestamp) { + /* + * We've reached the end of the time covered + * by this buffer. If this is the oldest + * buffer, we must do another pass + * to retrieve more data. + */ + dt_put_buf(dtp, buf); + if (timestamp == first_timestamp && + !dtp->dt_stopped) + break; + continue; + } + + if ((rval = dt_consume_cpu(dtp, fp, + buf->dtbd_cpu, buf, B_TRUE, pf, rf, arg)) != 0) + return (rval); + dt_pq_insert(dtp->dt_bufq, buf); + } + + /* Consume drops. */ + for (i = 0; i < max_ncpus; i++) { + if (drops[i] != 0) { + int error = dt_handle_cpudrop(dtp, i, + DTRACEDROP_PRINCIPAL, drops[i]); + if (error != 0) + return (error); + } + } -#if defined(sun) - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) { -#else - if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) { -#endif /* - * This _really_ shouldn't fail, but it is strictly speaking - * possible for this to return ENOENT if the CPU that called - * the END enabling somehow managed to become unconfigured. - * It's unclear how the user can possibly expect anything - * rational to happen in this case -- the state has been thrown - * out along with the unconfigured CPU -- so we'll just drive - * on... + * Reduce memory usage by re-allocating smaller buffers + * for the "remnants". */ - if (errno == ENOENT) - return (0); - - return (dt_set_errno(dtp, errno)); + while (buf = dt_pq_walk(dtp->dt_bufq, &cookie)) + dt_realloc_buf(dtp, buf, buf->dtbd_size); } - return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg)); + return (0); } |