summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormarkj <markj@FreeBSD.org>2013-05-12 16:26:33 +0000
committermarkj <markj@FreeBSD.org>2013-05-12 16:26:33 +0000
commit8f40d761c17b416a0ae225d630d4b7eeaf84ca6a (patch)
treee007a74cb5ae3110e231820664775e3290b8831c
parent84205c5d0632cde5deac5ffc2ace27f6e36e4920 (diff)
parent73d9b47b2a7a8df111f99bb69b7879f98cd3b57c (diff)
downloadFreeBSD-src-8f40d761c17b416a0ae225d630d4b7eeaf84ca6a.zip
FreeBSD-src-8f40d761c17b416a0ae225d630d4b7eeaf84ca6a.tar.gz
Bring back part of r249367 by adding DTrace's temporal option, which allows
users to guarantee that the output of DTrace scripts will be time-ordered. This option is enabled by adding the line #pragma D option temporal to the beginning of a script, or by adding '-x temporal' to the arguments of dtrace(1). This change fixes a bug in the original port of the temporal option. This bug was causing some assertions to fail, so they had been disabled; in this revision the assertions are working properly and are enabled. The DTrace version number has been bumped from 1.9.0 to 1.9.1 to reflect the language change that's being introduced. This change corresponds to part of illumos-gate commit e5803b76927480: 3021 option for time-ordered output from dtrace(1M) Reviewed by: pfg Obtained from: illumos MFC after: 1 month
-rw-r--r--cddl/contrib/opensolaris/cmd/dtrace/dtrace.c6
-rw-r--r--cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/buffering/tst.fill1.d17
-rw-r--r--cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal.ksh106
-rw-r--r--cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal2.ksh102
-rw-r--r--cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal3.d48
-rw-r--r--cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/speculation/err.BufSizeVariations1.d14
-rw-r--r--cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c478
-rw-r--r--cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h11
-rw-r--r--cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c11
-rw-r--r--cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c63
-rw-r--r--cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.c157
-rw-r--r--cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.h51
-rw-r--r--cddl/lib/libdtrace/Makefile1
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c225
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h37
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h59
-rw-r--r--sys/cddl/dev/dtrace/dtrace_ioctl.c2
17 files changed, 1087 insertions, 301 deletions
diff --git a/cddl/contrib/opensolaris/cmd/dtrace/dtrace.c b/cddl/contrib/opensolaris/cmd/dtrace/dtrace.c
index cc7959f..a745ceb 100644
--- a/cddl/contrib/opensolaris/cmd/dtrace/dtrace.c
+++ b/cddl/contrib/opensolaris/cmd/dtrace/dtrace.c
@@ -23,8 +23,9 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
#include <sys/types.h>
#include <sys/stat.h>
@@ -1409,6 +1410,7 @@ main(int argc, char *argv[])
(void) dtrace_setopt(g_dtp, "bufsize", "4m");
(void) dtrace_setopt(g_dtp, "aggsize", "4m");
#endif
+ (void) dtrace_setopt(g_dtp, "temporal", "yes");
/*
* If -G is specified, enable -xlink=dynamic and -xunodefs to permit
diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/buffering/tst.fill1.d b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/buffering/tst.fill1.d
index 143ed64..fffc7e3 100644
--- a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/buffering/tst.fill1.d
+++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/buffering/tst.fill1.d
@@ -23,26 +23,29 @@
* Copyright 2006 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
/*
* ASSERTION:
* Positive test for fill buffer policy.
*
* SECTION: Buffers and Buffering/fill Policy;
- * Buffers and Buffering/Buffer Sizes;
+ * Buffers and Buffering/Buffer Sizes;
* Options and Tunables/bufsize;
* Options and Tunables/bufpolicy;
* Options and Tunables/statusrate
*/
/*
- * This is a brute-force way of testing fill buffers. We assume that each
- * printf() stores 8 bytes. Because each fill buffer is per-CPU, we must
- * fill up our buffer in one series of enablings on a single CPU.
+ * This is a brute-force way of testing fill buffers. We assume that
+ * each printf() stores 16 bytes (4x 32-bit words for EPID, timestamp
+ * lo, timestamp hi, and the variable i). Because each fill buffer is
+ * per-CPU, we must fill up our buffer in one series of enablings on a
+ * single CPU.
*/
#pragma D option bufpolicy=fill
-#pragma D option bufsize=64
+#pragma D option bufsize=128
#pragma D option statusrate=10ms
#pragma D option quiet
diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal.ksh b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal.ksh
new file mode 100644
index 0000000..9a0aed0
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal.ksh
@@ -0,0 +1,106 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012 by Delphix. All rights reserved.
+#
+
+############################################################################
+# ASSERTION:
+# temporal option causes output to be sorted
+#
+# SECTION: Pragma
+#
+# NOTES: The temporal option has no effect on a single-CPU system, so
+# this needs to be run on a multi-CPU system to effectively test the
+# temporal option.
+#
+############################################################################
+
+if [ $# != 1 ]; then
+ echo expected one argument: '<'dtrace-path'>'
+ exit 2
+fi
+
+dtrace=$1
+file=/tmp/out.$$
+
+rm -f $file
+
+$dtrace -o $file -c 'sleep 3' -s /dev/stdin <<EOF
+ #pragma D option quiet
+ #pragma D option temporal
+
+ BEGIN
+ {
+ @lines = count();
+ printf("0 begin\n");
+ }
+
+ END
+ {
+ /* Bump @lines every time we print a line. */
+ @lines = count();
+ printf("%u end\n", timestamp);
+ @lines = count();
+ printa("99999999999999999 lines %@u\n", @lines);
+ }
+
+ profile-97hz
+ {
+ @lines = count();
+ printf("%u\n", timestamp);
+ }
+EOF
+
+status=$?
+if [ "$status" -ne 0 ]; then
+ echo $tst: dtrace failed
+ exit $status
+fi
+
+# dtrace outputs a blank line at the end, which will sort to the beginning,
+# so use head to remove the blank line.
+head -n -1 $file > $file.2
+
+sort -n $file.2 | diff $file.2 -
+status=$?
+if [ "$status" -ne 0 ]; then
+ echo $tst: output is not sorted
+ exit $status
+fi
+
+head -n 1 $file.2 | grep begin >/dev/null
+status=$?
+if [ "$status" -ne 0 ]; then
+ echo $tst: begin probe did not fire
+ exit $status
+fi
+
+tail -n 2 $file.2 | grep end >/dev/null
+status=$?
+if [ "$status" -ne 0 ]; then
+ echo $tst: end probe did not fire
+ exit $status
+fi
+
+if [ $(tail -n 1 $file.2 | cut -f3 -d ' ') -ne \
+ $(wc -l $file.2) ]; then
+ echo $tst: incorrect number of lines output
+ exit 1
+fi
+
+exit $status
diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal2.ksh b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal2.ksh
new file mode 100644
index 0000000..4e8d592
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal2.ksh
@@ -0,0 +1,102 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source. A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2012 by Delphix. All rights reserved.
+#
+
+############################################################################
+# ASSERTION:
+# temporal option causes output to be sorted, even when some
+# buffers are empty
+#
+# SECTION: Pragma
+#
+# NOTES: The temporal option has no effect on a single-CPU system, so
+# this needs to be run on a multi-CPU system to effectively test the
+# temporal option.
+#
+############################################################################
+
+if [ $# != 1 ]; then
+ echo expected one argument: '<'dtrace-path'>'
+ exit 2
+fi
+
+dtrace=$1
+file=/tmp/out.$$
+
+rm -f $file
+
+$dtrace -o $file -s /dev/stdin <<EOF
+ #pragma D option quiet
+ #pragma D option destructive
+ #pragma D option temporal
+ #pragma D option switchrate=1000hz
+
+ /*
+ * Use two enablings of the same probe, so that cpu 0 will always
+ * record its data just a little bit before the other cpus.
+ * We don't want to use the chill() action in the same enabling
+ * that we record the timestamp, because chill() causes the
+ * timestamp to be re-read, and thus not match the timestamp
+ * which libdtrace uses to sort the records.
+ */
+
+ profile-401
+ /cpu == 0/
+ {
+ printf("%d\n", timestamp);
+ }
+
+ profile-401
+ /cpu != 0/
+ {
+ chill(1000); /* one microsecond */
+ }
+
+ profile-401
+ /cpu != 0/
+ {
+ printf("%d\n", timestamp);
+ }
+
+ tick-1s
+ /k++ == 10/
+ {
+ printf("%d\n", timestamp);
+ exit(0);
+ }
+EOF
+
+status=$?
+if [ "$status" -ne 0 ]; then
+ echo $tst: dtrace failed
+ exit $status
+fi
+
+# dtrace outputs a blank line at the end, which will sort to the beginning,
+# so use grep to remove the blank line.
+head -n -1 $file > $file.2
+
+sort -n $file.2 | diff $file.2 -
+status=$?
+if [ "$status" -ne 0 ]; then
+ echo $tst: output is not sorted
+ exit $status
+fi
+
+exit $status
diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal3.d b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal3.d
new file mode 100644
index 0000000..b4c0e55
--- /dev/null
+++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/pragma/tst.temporal3.d
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+/*
+ * This test excercises the "remnant" handling of the temporal option.
+ * At the end of one pass of retrieving and printing data from all CPUs,
+ * some unprocessed data will remain, because its timestamp is after the
+ * time covered by all CPUs' buffers. This unprocessed data is
+ * rearranged in a more space-efficient manner. If this is done
+ * incorrectly, an alignment error may occur. To test this, we use a
+ * high-frequency probe so that data will be recorded in subsequent
+ * CPU's buffers after the first CPU's buffer is obtained. The
+ * combination of data traced here (a 8-byte value and a 4-byte value)
+ * is effective to cause alignment problems with an incorrect
+ * implementation.
+ *
+ * This test needs to be run on a multi-CPU system to be effective.
+ */
+
+#pragma D option quiet
+#pragma D option temporal
+
+profile-4997
+{
+ printf("%u %u", 1ULL, 2);
+}
+
+tick-1
+/i++ == 10/
+{
+ exit(0);
+}
diff --git a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/speculation/err.BufSizeVariations1.d b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/speculation/err.BufSizeVariations1.d
index e97506e..c59ea3b 100644
--- a/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/speculation/err.BufSizeVariations1.d
+++ b/cddl/contrib/opensolaris/cmd/dtrace/test/tst/common/speculation/err.BufSizeVariations1.d
@@ -24,7 +24,10 @@
* Use is subject to license terms.
*/
-#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
/*
* ASSERTION:
@@ -35,17 +38,10 @@
*
* NOTES: This test behaves differently depending on the values
* assigned to bufsize.
- * 1. 0 > bufsize.
- * 2. 0 == bufsize.
- * 3. 0 < bufsize <= 7
- * 4. 8 <= bufsize <= 31
- * 5. 32 <= bufsize <= 47
- * 6. 48 <= bufsize <= 71
- * 7. 72 <= bufsize
*/
#pragma D option quiet
-#pragma D option bufsize=41
+#pragma D option bufsize=49
BEGIN
{
diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c
index 797e7e3..c88a92c 100644
--- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c
+++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c
@@ -25,7 +25,7 @@
/*
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <stdlib.h>
@@ -39,6 +39,7 @@
#include <alloca.h>
#endif
#include <dt_impl.h>
+#include <dt_pq.h>
#if !defined(sun)
#include <libproc_compat.h>
#endif
@@ -443,17 +444,8 @@ dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
offs += epd->dtepd_size;
do {
- if (offs >= buf->dtbd_size) {
- /*
- * We're at the end -- maybe. If the oldest
- * record is non-zero, we need to wrap.
- */
- if (buf->dtbd_oldest != 0) {
- offs = 0;
- } else {
- goto out;
- }
- }
+ if (offs >= buf->dtbd_size)
+ goto out;
next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
@@ -2014,26 +2006,27 @@ dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
}
static int
-dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
+dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu,
+ dtrace_bufdesc_t *buf, boolean_t just_one,
dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
{
dtrace_epid_t id;
- size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
+ size_t offs;
int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
int rval, i, n;
- dtrace_epid_t last = DTRACE_EPIDNONE;
uint64_t tracememsize = 0;
dtrace_probedata_t data;
uint64_t drops;
- caddr_t addr;
+ data.dtpda_flow = dtp->dt_flow;
+ data.dtpda_indent = dtp->dt_indent;
+ data.dtpda_prefix = dtp->dt_prefix;
bzero(&data, sizeof (data));
data.dtpda_handle = dtp;
data.dtpda_cpu = cpu;
-again:
- for (offs = start; offs < end; ) {
+ for (offs = buf->dtbd_oldest; offs < buf->dtbd_size; ) {
dtrace_eprobedesc_t *epd;
/*
@@ -2068,7 +2061,8 @@ again:
}
if (flow)
- (void) dt_flowindent(dtp, &data, last, buf, offs);
+ (void) dt_flowindent(dtp, &data, dtp->dt_last_epid,
+ buf, offs);
rval = (*efunc)(&data, arg);
@@ -2087,6 +2081,7 @@ again:
return (dt_set_errno(dtp, EDT_BADRVAL));
for (i = 0; i < epd->dtepd_nrecs; i++) {
+ caddr_t addr;
dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
dtrace_actkind_t act = rec->dtrd_action;
@@ -2458,14 +2453,16 @@ nextrec:
rval = (*rfunc)(&data, NULL, arg);
nextepid:
offs += epd->dtepd_size;
- last = id;
+ dtp->dt_last_epid = id;
+ if (just_one) {
+ buf->dtbd_oldest = offs;
+ break;
+ }
}
- if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
- end = buf->dtbd_oldest;
- start = 0;
- goto again;
- }
+ dtp->dt_flow = data.dtpda_flow;
+ dtp->dt_indent = data.dtpda_indent;
+ dtp->dt_prefix = data.dtpda_prefix;
if ((drops = buf->dtbd_drops) == 0)
return (0);
@@ -2478,6 +2475,130 @@ nextepid:
return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
}
+/*
+ * Reduce memory usage by shrinking the buffer if it's no more than half full.
+ * Note, we need to preserve the alignment of the data at dtbd_oldest, which is
+ * only 4-byte aligned.
+ */
+static void
+dt_realloc_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf, int cursize)
+{
+ uint64_t used = buf->dtbd_size - buf->dtbd_oldest;
+ if (used < cursize / 2) {
+ int misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
+ char *newdata = dt_alloc(dtp, used + misalign);
+ if (newdata == NULL)
+ return;
+ bzero(newdata, misalign);
+ bcopy(buf->dtbd_data + buf->dtbd_oldest,
+ newdata + misalign, used);
+ dt_free(dtp, buf->dtbd_data);
+ buf->dtbd_oldest = misalign;
+ buf->dtbd_size = used + misalign;
+ buf->dtbd_data = newdata;
+ }
+}
+
+/*
+ * If the ring buffer has wrapped, the data is not in order. Rearrange it
+ * so that it is. Note, we need to preserve the alignment of the data at
+ * dtbd_oldest, which is only 4-byte aligned.
+ */
+static int
+dt_unring_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
+{
+ int misalign;
+ char *newdata, *ndp;
+
+ if (buf->dtbd_oldest == 0)
+ return (0);
+
+ misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
+ newdata = ndp = dt_alloc(dtp, buf->dtbd_size + misalign);
+
+ if (newdata == NULL)
+ return (-1);
+
+ assert(0 == (buf->dtbd_size & (sizeof (uint64_t) - 1)));
+
+ bzero(ndp, misalign);
+ ndp += misalign;
+
+ bcopy(buf->dtbd_data + buf->dtbd_oldest, ndp,
+ buf->dtbd_size - buf->dtbd_oldest);
+ ndp += buf->dtbd_size - buf->dtbd_oldest;
+
+ bcopy(buf->dtbd_data, ndp, buf->dtbd_oldest);
+
+ dt_free(dtp, buf->dtbd_data);
+ buf->dtbd_oldest = 0;
+ buf->dtbd_data = newdata;
+ buf->dtbd_size += misalign;
+
+ return (0);
+}
+
+static void
+dt_put_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
+{
+ dt_free(dtp, buf->dtbd_data);
+ dt_free(dtp, buf);
+}
+
+/*
+ * Returns 0 on success, in which case *cbp will be filled in if we retrieved
+ * data, or NULL if there is no data for this CPU.
+ * Returns -1 on failure and sets dt_errno.
+ */
+static int
+dt_get_buf(dtrace_hdl_t *dtp, int cpu, dtrace_bufdesc_t **bufp)
+{
+ dtrace_optval_t size;
+ dtrace_bufdesc_t *buf = dt_zalloc(dtp, sizeof (*buf));
+ int error;
+
+ if (buf == NULL)
+ return (-1);
+
+ (void) dtrace_getopt(dtp, "bufsize", &size);
+ buf->dtbd_data = dt_alloc(dtp, size);
+ if (buf->dtbd_data == NULL) {
+ dt_free(dtp, buf);
+ return (-1);
+ }
+ buf->dtbd_size = size;
+ buf->dtbd_cpu = cpu;
+
+#if defined(sun)
+ if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
+#else
+ if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
+#endif
+ dt_put_buf(dtp, buf);
+ /*
+ * If we failed with ENOENT, it may be because the
+ * CPU was unconfigured -- this is okay. Any other
+ * error, however, is unexpected.
+ */
+ if (errno == ENOENT) {
+ *bufp = NULL;
+ return (0);
+ }
+
+ return (dt_set_errno(dtp, errno));
+ }
+
+ error = dt_unring_buf(dtp, buf);
+ if (error != 0) {
+ dt_put_buf(dtp, buf);
+ return (error);
+ }
+ dt_realloc_buf(dtp, buf, size);
+
+ *bufp = buf;
+ return (0);
+}
+
typedef struct dt_begin {
dtrace_consume_probe_f *dtbgn_probefunc;
dtrace_consume_rec_f *dtbgn_recfunc;
@@ -2541,7 +2662,7 @@ dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
}
static int
-dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
+dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp,
dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
{
/*
@@ -2565,33 +2686,19 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
* first pass, and that we only process ERROR enablings _not_ induced
* by BEGIN enablings in the second pass.
*/
+
dt_begin_t begin;
processorid_t cpu = dtp->dt_beganon;
- dtrace_bufdesc_t nbuf;
-#if !defined(sun)
- dtrace_bufdesc_t *pbuf;
-#endif
int rval, i;
static int max_ncpus;
- dtrace_optval_t size;
+ dtrace_bufdesc_t *buf;
dtp->dt_beganon = -1;
-#if defined(sun)
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
-#else
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
-#endif
- /*
- * We really don't expect this to fail, but it is at least
- * technically possible for this to fail with ENOENT. In this
- * case, we just drive on...
- */
- if (errno == ENOENT)
- return (0);
-
- return (dt_set_errno(dtp, errno));
- }
+ if (dt_get_buf(dtp, cpu, &buf) != 0)
+ return (-1);
+ if (buf == NULL)
+ return (0);
if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
/*
@@ -2599,7 +2706,10 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
* we are, we actually processed any END probes on another
* CPU. We can simply consume this buffer and return.
*/
- return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
+ rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
+ pf, rf, arg);
+ dt_put_buf(dtp, buf);
+ return (rval);
}
begin.dtbgn_probefunc = pf;
@@ -2616,61 +2726,41 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
dtp->dt_errhdlr = dt_consume_begin_error;
dtp->dt_errarg = &begin;
- rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
- dt_consume_begin_record, &begin);
+ rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
+ dt_consume_begin_probe, dt_consume_begin_record, &begin);
dtp->dt_errhdlr = begin.dtbgn_errhdlr;
dtp->dt_errarg = begin.dtbgn_errarg;
- if (rval != 0)
+ if (rval != 0) {
+ dt_put_buf(dtp, buf);
return (rval);
-
- /*
- * Now allocate a new buffer. We'll use this to deal with every other
- * CPU.
- */
- bzero(&nbuf, sizeof (dtrace_bufdesc_t));
- (void) dtrace_getopt(dtp, "bufsize", &size);
- if ((nbuf.dtbd_data = malloc(size)) == NULL)
- return (dt_set_errno(dtp, EDT_NOMEM));
+ }
if (max_ncpus == 0)
max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
for (i = 0; i < max_ncpus; i++) {
- nbuf.dtbd_cpu = i;
-
+ dtrace_bufdesc_t *nbuf;
if (i == cpu)
continue;
-#if defined(sun)
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
-#else
- pbuf = &nbuf;
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &pbuf) == -1) {
-#endif
- /*
- * If we failed with ENOENT, it may be because the
- * CPU was unconfigured -- this is okay. Any other
- * error, however, is unexpected.
- */
- if (errno == ENOENT)
- continue;
-
- free(nbuf.dtbd_data);
-
- return (dt_set_errno(dtp, errno));
+ if (dt_get_buf(dtp, i, &nbuf) != 0) {
+ dt_put_buf(dtp, buf);
+ return (-1);
}
+ if (nbuf == NULL)
+ continue;
- if ((rval = dt_consume_cpu(dtp, fp,
- i, &nbuf, pf, rf, arg)) != 0) {
- free(nbuf.dtbd_data);
+ rval = dt_consume_cpu(dtp, fp, i, nbuf, B_FALSE,
+ pf, rf, arg);
+ dt_put_buf(dtp, nbuf);
+ if (rval != 0) {
+ dt_put_buf(dtp, buf);
return (rval);
}
}
- free(nbuf.dtbd_data);
-
/*
* Okay -- we're done with the other buffers. Now we want to
* reconsume the first buffer -- but this time we're looking for
@@ -2685,8 +2775,8 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
dtp->dt_errhdlr = dt_consume_begin_error;
dtp->dt_errarg = &begin;
- rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
- dt_consume_begin_record, &begin);
+ rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
+ dt_consume_begin_probe, dt_consume_begin_record, &begin);
dtp->dt_errhdlr = begin.dtbgn_errhdlr;
dtp->dt_errarg = begin.dtbgn_errarg;
@@ -2694,11 +2784,32 @@ dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
return (rval);
}
+/* ARGSUSED */
+static uint64_t
+dt_buf_oldest(void *elem, void *arg)
+{
+ dtrace_bufdesc_t *buf = elem;
+ size_t offs = buf->dtbd_oldest;
+
+ while (offs < buf->dtbd_size) {
+ dtrace_rechdr_t *dtrh =
+ /* LINTED - alignment */
+ (dtrace_rechdr_t *)(buf->dtbd_data + offs);
+ if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
+ offs += sizeof (dtrace_epid_t);
+ } else {
+ return (DTRACE_RECORD_LOAD_TIMESTAMP(dtrh));
+ }
+ }
+
+ /* There are no records left; use the time the buffer was retrieved. */
+ return (buf->dtbd_timestamp);
+}
+
int
dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
{
- dtrace_bufdesc_t *buf = &dtp->dt_buf;
dtrace_optval_t size;
static int max_ncpus;
int i, rval;
@@ -2726,79 +2837,158 @@ dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
if (rf == NULL)
rf = (dtrace_consume_rec_f *)dt_nullrec;
- if (buf->dtbd_data == NULL) {
- (void) dtrace_getopt(dtp, "bufsize", &size);
- if ((buf->dtbd_data = malloc(size)) == NULL)
- return (dt_set_errno(dtp, EDT_NOMEM));
-
- buf->dtbd_size = size;
- }
-
- /*
- * If we have just begun, we want to first process the CPU that
- * executed the BEGIN probe (if any).
- */
- if (dtp->dt_active && dtp->dt_beganon != -1) {
- buf->dtbd_cpu = dtp->dt_beganon;
- if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
- return (rval);
- }
-
- for (i = 0; i < max_ncpus; i++) {
- buf->dtbd_cpu = i;
-
+ if (dtp->dt_options[DTRACEOPT_TEMPORAL] == DTRACEOPT_UNSET) {
/*
- * If we have stopped, we want to process the CPU on which the
- * END probe was processed only _after_ we have processed
- * everything else.
+ * The output will not be in the order it was traced. Rather,
+ * we will consume all of the data from each CPU's buffer in
+ * turn. We apply special handling for the records from BEGIN
+ * and END probes so that they are consumed first and last,
+ * respectively.
+ *
+ * If we have just begun, we want to first process the CPU that
+ * executed the BEGIN probe (if any).
*/
- if (dtp->dt_stopped && (i == dtp->dt_endedon))
- continue;
+ if (dtp->dt_active && dtp->dt_beganon != -1 &&
+ (rval = dt_consume_begin(dtp, fp, pf, rf, arg)) != 0)
+ return (rval);
+
+ for (i = 0; i < max_ncpus; i++) {
+ dtrace_bufdesc_t *buf;
-#if defined(sun)
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
-#else
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
-#endif
/*
- * If we failed with ENOENT, it may be because the
- * CPU was unconfigured -- this is okay. Any other
- * error, however, is unexpected.
+ * If we have stopped, we want to process the CPU on
+ * which the END probe was processed only _after_ we
+ * have processed everything else.
*/
- if (errno == ENOENT)
+ if (dtp->dt_stopped && (i == dtp->dt_endedon))
continue;
- return (dt_set_errno(dtp, errno));
+ if (dt_get_buf(dtp, i, &buf) != 0)
+ return (-1);
+ if (buf == NULL)
+ continue;
+
+ dtp->dt_flow = 0;
+ dtp->dt_indent = 0;
+ dtp->dt_prefix = NULL;
+ rval = dt_consume_cpu(dtp, fp, i,
+ buf, B_FALSE, pf, rf, arg);
+ dt_put_buf(dtp, buf);
+ if (rval != 0)
+ return (rval);
}
+ if (dtp->dt_stopped) {
+ dtrace_bufdesc_t *buf;
+
+ if (dt_get_buf(dtp, dtp->dt_endedon, &buf) != 0)
+ return (-1);
+ if (buf == NULL)
+ return (0);
- if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
+ rval = dt_consume_cpu(dtp, fp, dtp->dt_endedon,
+ buf, B_FALSE, pf, rf, arg);
+ dt_put_buf(dtp, buf);
return (rval);
- }
+ }
+ } else {
+ /*
+ * The output will be in the order it was traced (or for
+ * speculations, when it was committed). We retrieve a buffer
+ * from each CPU and put it into a priority queue, which sorts
+ * based on the first entry in the buffer. This is sufficient
+ * because entries within a buffer are already sorted.
+ *
+ * We then consume records one at a time, always consuming the
+ * oldest record, as determined by the priority queue. When
+ * we reach the end of the time covered by these buffers,
+ * we need to stop and retrieve more records on the next pass.
+ * The kernel tells us the time covered by each buffer, in
+ * dtbd_timestamp. The first buffer's timestamp tells us the
+ * time covered by all buffers, as subsequently retrieved
+ * buffers will cover to a more recent time.
+ */
- if (!dtp->dt_stopped)
- return (0);
+ uint64_t *drops = alloca(max_ncpus * sizeof (uint64_t));
+ uint64_t first_timestamp = 0;
+ uint_t cookie = 0;
+ dtrace_bufdesc_t *buf;
- buf->dtbd_cpu = dtp->dt_endedon;
+ bzero(drops, max_ncpus * sizeof (uint64_t));
+
+ if (dtp->dt_bufq == NULL) {
+ dtp->dt_bufq = dt_pq_init(dtp, max_ncpus * 2,
+ dt_buf_oldest, NULL);
+ if (dtp->dt_bufq == NULL) /* ENOMEM */
+ return (-1);
+ }
+
+ /* Retrieve data from each CPU. */
+ (void) dtrace_getopt(dtp, "bufsize", &size);
+ for (i = 0; i < max_ncpus; i++) {
+ dtrace_bufdesc_t *buf;
+
+ if (dt_get_buf(dtp, i, &buf) != 0)
+ return (-1);
+ if (buf != NULL) {
+ if (first_timestamp == 0)
+ first_timestamp = buf->dtbd_timestamp;
+ assert(buf->dtbd_timestamp >= first_timestamp);
+
+ dt_pq_insert(dtp->dt_bufq, buf);
+ drops[i] = buf->dtbd_drops;
+ buf->dtbd_drops = 0;
+ }
+ }
+
+ /* Consume records. */
+ for (;;) {
+ dtrace_bufdesc_t *buf = dt_pq_pop(dtp->dt_bufq);
+ uint64_t timestamp;
+
+ if (buf == NULL)
+ break;
+
+ timestamp = dt_buf_oldest(buf, dtp);
+ assert(timestamp >= dtp->dt_last_timestamp);
+ dtp->dt_last_timestamp = timestamp;
+
+ if (timestamp == buf->dtbd_timestamp) {
+ /*
+ * We've reached the end of the time covered
+ * by this buffer. If this is the oldest
+ * buffer, we must do another pass
+ * to retrieve more data.
+ */
+ dt_put_buf(dtp, buf);
+ if (timestamp == first_timestamp &&
+ !dtp->dt_stopped)
+ break;
+ continue;
+ }
+
+ if ((rval = dt_consume_cpu(dtp, fp,
+ buf->dtbd_cpu, buf, B_TRUE, pf, rf, arg)) != 0)
+ return (rval);
+ dt_pq_insert(dtp->dt_bufq, buf);
+ }
+
+ /* Consume drops. */
+ for (i = 0; i < max_ncpus; i++) {
+ if (drops[i] != 0) {
+ int error = dt_handle_cpudrop(dtp, i,
+ DTRACEDROP_PRINCIPAL, drops[i]);
+ if (error != 0)
+ return (error);
+ }
+ }
-#if defined(sun)
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
-#else
- if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
-#endif
/*
- * This _really_ shouldn't fail, but it is strictly speaking
- * possible for this to return ENOENT if the CPU that called
- * the END enabling somehow managed to become unconfigured.
- * It's unclear how the user can possibly expect anything
- * rational to happen in this case -- the state has been thrown
- * out along with the unconfigured CPU -- so we'll just drive
- * on...
+ * Reduce memory usage by re-allocating smaller buffers
+ * for the "remnants".
*/
- if (errno == ENOENT)
- return (0);
-
- return (dt_set_errno(dtp, errno));
+ while (buf = dt_pq_walk(dtp->dt_bufq, &cookie))
+ dt_realloc_buf(dtp, buf, buf->dtbd_size);
}
- return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
+ return (0);
}
diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h
index 61d901b..c066b03 100644
--- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h
+++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_impl.h
@@ -26,7 +26,7 @@
/*
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _DT_IMPL_H
@@ -64,6 +64,7 @@ extern "C" {
#include <dt_proc.h>
#include <dt_dof.h>
#include <dt_pcb.h>
+#include <dt_pq.h>
struct dt_module; /* see below */
struct dt_pfdict; /* see <dt_printf.h> */
@@ -239,6 +240,7 @@ struct dtrace_hdl {
uint_t dt_provbuckets; /* number of provider hash buckets */
uint_t dt_nprovs; /* number of providers in hash and list */
dt_proc_hash_t *dt_procs; /* hash table of grabbed process handles */
+ char **dt_proc_env; /* additional environment variables */
dt_intdesc_t dt_ints[6]; /* cached integer type descriptions */
ctf_id_t dt_type_func; /* cached CTF identifier for function type */
ctf_id_t dt_type_fptr; /* cached CTF identifier for function pointer */
@@ -257,7 +259,7 @@ struct dtrace_hdl {
int dt_maxstrdata; /* max strdata ID */
char **dt_strdata; /* pointer to strdata array */
dt_aggregate_t dt_aggregate; /* aggregate */
- dtrace_bufdesc_t dt_buf; /* staging buffer */
+ dt_pq_t *dt_bufq; /* CPU-specific data queue */
struct dt_pfdict *dt_pfdict; /* dictionary of printf conversions */
dt_version_t dt_vmax; /* optional ceiling on program API binding */
dtrace_attribute_t dt_amin; /* optional floor on program attributes */
@@ -326,6 +328,11 @@ struct dtrace_hdl {
struct utsname dt_uts; /* uname(2) information for system */
dt_list_t dt_lib_dep; /* scratch linked-list of lib dependencies */
dt_list_t dt_lib_dep_sorted; /* dependency sorted library list */
+ dtrace_flowkind_t dt_flow; /* flow kind */
+ const char *dt_prefix; /* recommended flow prefix */
+ int dt_indent; /* recommended flow indent */
+ dtrace_epid_t dt_last_epid; /* most recently consumed EPID */
+ uint64_t dt_last_timestamp; /* most recently consumed timestamp */
};
/*
diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c
index dcc4e7c..25f9956 100644
--- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c
+++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_open.c
@@ -22,7 +22,7 @@
/*
* Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/types.h>
@@ -92,7 +92,7 @@
/*
* The version number should be increased for every customer visible release
- * of Solaris. The major number should be incremented when a fundamental
+ * of DTrace. The major number should be incremented when a fundamental
* change has been made that would affect all consumers, and would reflect
* sweeping changes to DTrace or the D language. The minor number should be
* incremented when a change is introduced that could break scripts that had
@@ -121,8 +121,9 @@
#define DT_VERS_1_8 DT_VERSION_NUMBER(1, 8, 0)
#define DT_VERS_1_8_1 DT_VERSION_NUMBER(1, 8, 1)
#define DT_VERS_1_9 DT_VERSION_NUMBER(1, 9, 0)
-#define DT_VERS_LATEST DT_VERS_1_9
-#define DT_VERS_STRING "Sun D 1.9"
+#define DT_VERS_1_9_1 DT_VERSION_NUMBER(1, 9, 1)
+#define DT_VERS_LATEST DT_VERS_1_9_1
+#define DT_VERS_STRING "Sun D 1.9.1"
const dt_version_t _dtrace_versions[] = {
DT_VERS_1_0, /* D API 1.0.0 (PSARC 2001/466) Solaris 10 FCS */
@@ -143,6 +144,7 @@ const dt_version_t _dtrace_versions[] = {
DT_VERS_1_8, /* D API 1.8 */
DT_VERS_1_8_1, /* D API 1.8.1 */
DT_VERS_1_9, /* D API 1.9 */
+ DT_VERS_1_9_1, /* D API 1.9.1 */
0
};
@@ -1630,7 +1632,6 @@ dtrace_close(dtrace_hdl_t *dtp)
dt_strdata_destroy(dtp);
dt_buffered_destroy(dtp);
dt_aggregate_destroy(dtp);
- free(dtp->dt_buf.dtbd_data);
dt_pfdict_destroy(dtp);
dt_provmod_destroy(&dtp->dt_provmod);
dt_dof_fini(dtp);
diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c
index fa1407f..020df2f 100644
--- a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c
+++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_options.c
@@ -26,6 +26,10 @@
#pragma ident "%Z%%M% %I% %E% SMI"
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
#include <sys/resource.h>
#include <sys/mman.h>
#include <sys/types.h>
@@ -368,6 +372,61 @@ dt_opt_pgmax(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
return (0);
}
+static int
+dt_opt_setenv(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
+{
+ char **p;
+ char *var;
+ int i;
+
+ /*
+ * We can't effectively set environment variables from #pragma lines
+ * since the processes have already been spawned.
+ */
+ if (dtp->dt_pcb != NULL)
+ return (dt_set_errno(dtp, EDT_BADOPTCTX));
+
+ if (arg == NULL)
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+
+ if (!option && strchr(arg, '=') != NULL)
+ return (dt_set_errno(dtp, EDT_BADOPTVAL));
+
+ for (i = 1, p = dtp->dt_proc_env; *p != NULL; i++, p++)
+ continue;
+
+ for (p = dtp->dt_proc_env; *p != NULL; p++) {
+ var = strchr(*p, '=');
+ if (var == NULL)
+ var = *p + strlen(*p);
+ if (strncmp(*p, arg, var - *p) == 0) {
+ dt_free(dtp, *p);
+ *p = dtp->dt_proc_env[i - 1];
+ dtp->dt_proc_env[i - 1] = NULL;
+ i--;
+ }
+ }
+
+ if (option) {
+ if ((var = strdup(arg)) == NULL)
+ return (dt_set_errno(dtp, EDT_NOMEM));
+
+ if ((p = dt_alloc(dtp, sizeof (char *) * (i + 1))) == NULL) {
+ dt_free(dtp, var);
+ return (dt_set_errno(dtp, EDT_NOMEM));
+ }
+
+ bcopy(dtp->dt_proc_env, p, sizeof (char *) * i);
+ dt_free(dtp, dtp->dt_proc_env);
+ dtp->dt_proc_env = p;
+
+ dtp->dt_proc_env[i - 1] = var;
+ dtp->dt_proc_env[i] = NULL;
+ }
+
+ return (0);
+}
+
/*ARGSUSED*/
static int
dt_opt_stdc(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
@@ -411,7 +470,6 @@ dt_opt_syslibdir(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
return (0);
}
-
/*ARGSUSED*/
static int
dt_opt_tree(dtrace_hdl_t *dtp, const char *arg, uintptr_t option)
@@ -912,6 +970,7 @@ static const dt_option_t _dtrace_ctoptions[] = {
{ "pgmax", dt_opt_pgmax },
{ "preallocate", dt_opt_preallocate },
{ "pspec", dt_opt_cflags, DTRACE_C_PSPEC },
+ { "setenv", dt_opt_setenv, 1 },
{ "stdc", dt_opt_stdc },
{ "strip", dt_opt_dflags, DTRACE_D_STRIP },
{ "syslibdir", dt_opt_syslibdir },
@@ -920,6 +979,7 @@ static const dt_option_t _dtrace_ctoptions[] = {
{ "udefs", dt_opt_invcflags, DTRACE_C_UNODEF },
{ "undef", dt_opt_cpp_opts, (uintptr_t)"-U" },
{ "unodefs", dt_opt_cflags, DTRACE_C_UNODEF },
+ { "unsetenv", dt_opt_setenv, 0 },
{ "verbose", dt_opt_cflags, DTRACE_C_DIFV },
{ "version", dt_opt_version },
{ "zdefs", dt_opt_cflags, DTRACE_C_ZDEFS },
@@ -947,6 +1007,7 @@ static const dt_option_t _dtrace_rtoptions[] = {
{ "statusrate", dt_opt_rate, DTRACEOPT_STATUSRATE },
{ "strsize", dt_opt_strsize, DTRACEOPT_STRSIZE },
{ "ustackframes", dt_opt_runtime, DTRACEOPT_USTACKFRAMES },
+ { "temporal", dt_opt_runtime, DTRACEOPT_TEMPORAL },
{ NULL, NULL, 0 }
};
diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.c b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.c
new file mode 100644
index 0000000..0cd556a
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.c
@@ -0,0 +1,157 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#include <dtrace.h>
+#include <dt_impl.h>
+#include <dt_pq.h>
+#include <assert.h>
+
+/*
+ * Create a new priority queue.
+ *
+ * size is the maximum number of items that will be stored in the priority
+ * queue at one time.
+ */
+dt_pq_t *
+dt_pq_init(dtrace_hdl_t *dtp, uint_t size, dt_pq_value_f value_cb, void *cb_arg)
+{
+ dt_pq_t *p;
+ assert(size > 1);
+
+ if ((p = dt_zalloc(dtp, sizeof (dt_pq_t))) == NULL)
+ return (NULL);
+
+ p->dtpq_items = dt_zalloc(dtp, size * sizeof (p->dtpq_items[0]));
+ if (p->dtpq_items == NULL) {
+ dt_free(dtp, p);
+ return (NULL);
+ }
+
+ p->dtpq_hdl = dtp;
+ p->dtpq_size = size;
+ p->dtpq_last = 1;
+ p->dtpq_value = value_cb;
+ p->dtpq_arg = cb_arg;
+
+ return (p);
+}
+
+void
+dt_pq_fini(dt_pq_t *p)
+{
+ dtrace_hdl_t *dtp = p->dtpq_hdl;
+
+ dt_free(dtp, p->dtpq_items);
+ dt_free(dtp, p);
+}
+
+static uint64_t
+dt_pq_getvalue(dt_pq_t *p, uint_t index)
+{
+ void *item = p->dtpq_items[index];
+ return (p->dtpq_value(item, p->dtpq_arg));
+}
+
+void
+dt_pq_insert(dt_pq_t *p, void *item)
+{
+ uint_t i;
+
+ assert(p->dtpq_last < p->dtpq_size);
+
+ i = p->dtpq_last++;
+ p->dtpq_items[i] = item;
+
+ while (i > 1 && dt_pq_getvalue(p, i) < dt_pq_getvalue(p, i / 2)) {
+ void *tmp = p->dtpq_items[i];
+ p->dtpq_items[i] = p->dtpq_items[i / 2];
+ p->dtpq_items[i / 2] = tmp;
+ i /= 2;
+ }
+}
+
+/*
+ * Return elements from the priority queue. *cookie should be zero when first
+ * called. Returns NULL when there are no more elements.
+ */
+void *
+dt_pq_walk(dt_pq_t *p, uint_t *cookie)
+{
+ (*cookie)++;
+ if (*cookie >= p->dtpq_last)
+ return (NULL);
+
+ return (p->dtpq_items[*cookie]);
+}
+
+void *
+dt_pq_pop(dt_pq_t *p)
+{
+ uint_t i = 1;
+ void *ret;
+
+ assert(p->dtpq_last > 0);
+
+ if (p->dtpq_last == 1)
+ return (NULL);
+
+ ret = p->dtpq_items[1];
+
+ p->dtpq_last--;
+ p->dtpq_items[1] = p->dtpq_items[p->dtpq_last];
+ p->dtpq_items[p->dtpq_last] = NULL;
+
+ for (;;) {
+ uint_t lc = i * 2;
+ uint_t rc = i * 2 + 1;
+ uint_t c;
+ uint64_t v;
+ void *tmp;
+
+ if (lc >= p->dtpq_last)
+ break;
+
+ if (rc >= p->dtpq_last) {
+ c = lc;
+ v = dt_pq_getvalue(p, lc);
+ } else {
+ uint64_t lv = dt_pq_getvalue(p, lc);
+ uint64_t rv = dt_pq_getvalue(p, rc);
+
+ if (lv < rv) {
+ c = lc;
+ v = lv;
+ } else {
+ c = rc;
+ v = rv;
+ }
+ }
+
+ if (v >= dt_pq_getvalue(p, i))
+ break;
+
+ tmp = p->dtpq_items[i];
+ p->dtpq_items[i] = p->dtpq_items[c];
+ p->dtpq_items[c] = tmp;
+
+ i = c;
+ }
+
+ return (ret);
+}
diff --git a/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.h b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.h
new file mode 100644
index 0000000..8184a90
--- /dev/null
+++ b/cddl/contrib/opensolaris/lib/libdtrace/common/dt_pq.h
@@ -0,0 +1,51 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2012 by Delphix. All rights reserved.
+ */
+
+#ifndef _DT_PQ_H
+#define _DT_PQ_H
+
+#include <dtrace.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef uint64_t (*dt_pq_value_f)(void *, void *);
+
+typedef struct dt_pq {
+ dtrace_hdl_t *dtpq_hdl; /* dtrace handle */
+ void **dtpq_items; /* array of elements */
+ uint_t dtpq_size; /* count of allocated elements */
+ uint_t dtpq_last; /* next free slot */
+ dt_pq_value_f dtpq_value; /* callback to get the value */
+ void *dtpq_arg; /* callback argument */
+} dt_pq_t;
+
+extern dt_pq_t *dt_pq_init(dtrace_hdl_t *, uint_t size, dt_pq_value_f, void *);
+extern void dt_pq_fini(dt_pq_t *);
+
+extern void dt_pq_insert(dt_pq_t *, void *);
+extern void *dt_pq_pop(dt_pq_t *);
+extern void *dt_pq_walk(dt_pq_t *, uint_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _DT_PQ_H */
diff --git a/cddl/lib/libdtrace/Makefile b/cddl/lib/libdtrace/Makefile
index 7dd0afd..4fd7fae 100644
--- a/cddl/lib/libdtrace/Makefile
+++ b/cddl/lib/libdtrace/Makefile
@@ -31,6 +31,7 @@ SRCS= dt_aggregate.c \
dt_parser.c \
dt_pcb.c \
dt_pid.c \
+ dt_pq.c \
dt_pragma.c \
dt_print.c \
dt_printf.c \
diff --git a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
index c8bb4de..729a2cb 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/dtrace/dtrace.c
@@ -23,6 +23,7 @@
/*
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved
* Use is subject to license terms.
*/
@@ -2345,9 +2346,10 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
{
dtrace_speculation_t *spec;
dtrace_buffer_t *src, *dest;
- uintptr_t daddr, saddr, dlimit;
+ uintptr_t daddr, saddr, dlimit, slimit;
dtrace_speculation_state_t current, new = 0;
intptr_t offs;
+ uint64_t timestamp;
if (which == 0)
return;
@@ -2423,7 +2425,37 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
}
/*
- * We have the space; copy the buffer across. (Note that this is a
+ * We have sufficient space to copy the speculative buffer into the
+ * primary buffer. First, modify the speculative buffer, filling
+ * in the timestamp of all entries with the current time. The data
+ * must have the commit() time rather than the time it was traced,
+ * so that all entries in the primary buffer are in timestamp order.
+ */
+ timestamp = dtrace_gethrtime();
+ saddr = (uintptr_t)src->dtb_tomax;
+ slimit = saddr + src->dtb_offset;
+ while (saddr < slimit) {
+ size_t size;
+ dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
+
+ if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
+ saddr += sizeof (dtrace_epid_t);
+ continue;
+ }
+ ASSERT3U(dtrh->dtrh_epid, <=, state->dts_necbs);
+ size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
+
+ ASSERT3U(saddr + size, <=, slimit);
+ ASSERT3U(size, >=, sizeof (dtrace_rechdr_t));
+ ASSERT3U(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh), ==, UINT64_MAX);
+
+ DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
+
+ saddr += size;
+ }
+
+ /*
+ * Copy the buffer across. (Note that this is a
* highly subobtimal bcopy(); in the unlikely event that this becomes
* a serious performance issue, a high-performance DTrace-specific
* bcopy() should obviously be invented.)
@@ -6206,7 +6238,7 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
if (now - state->dts_alive > dtrace_deadman_timeout) {
/*
* We seem to be dead. Unless we (a) have kernel
- * destructive permissions (b) have expicitly enabled
+ * destructive permissions (b) have explicitly enabled
* destructive actions and (c) destructive actions have
* not been disabled, we're going to transition into
* the KILLED state, from which no further processing
@@ -6234,8 +6266,18 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
tomax = buf->dtb_tomax;
ASSERT(tomax != NULL);
- if (ecb->dte_size != 0)
- DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid);
+ if (ecb->dte_size != 0) {
+ dtrace_rechdr_t dtrh;
+ if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
+ mstate.dtms_timestamp = dtrace_gethrtime();
+ mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
+ }
+ ASSERT3U(ecb->dte_size, >=, sizeof (dtrace_rechdr_t));
+ dtrh.dtrh_epid = ecb->dte_epid;
+ DTRACE_RECORD_STORE_TIMESTAMP(&dtrh,
+ mstate.dtms_timestamp);
+ *((dtrace_rechdr_t *)(tomax + offs)) = dtrh;
+ }
mstate.dtms_epid = ecb->dte_epid;
mstate.dtms_present |= DTRACE_MSTATE_EPID;
@@ -6382,7 +6424,9 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
continue;
switch (act->dta_kind) {
- case DTRACEACT_SPECULATE:
+ case DTRACEACT_SPECULATE: {
+ dtrace_rechdr_t *dtrh;
+
ASSERT(buf == &state->dts_buffer[cpuid]);
buf = dtrace_speculation_buffer(state,
cpuid, val);
@@ -6404,10 +6448,23 @@ dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1,
tomax = buf->dtb_tomax;
ASSERT(tomax != NULL);
- if (ecb->dte_size != 0)
- DTRACE_STORE(uint32_t, tomax, offs,
- ecb->dte_epid);
+ if (ecb->dte_size == 0)
+ continue;
+
+ ASSERT3U(ecb->dte_size, >=,
+ sizeof (dtrace_rechdr_t));
+ dtrh = ((void *)(tomax + offs));
+ dtrh->dtrh_epid = ecb->dte_epid;
+ /*
+ * When the speculation is committed, all of
+ * the records in the speculative buffer will
+ * have their timestamps set to the commit
+ * time. Until then, it is set to a sentinel
+ * value, for debugability.
+ */
+ DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
continue;
+ }
case DTRACEACT_PRINTM: {
/* The DIF returns a 'memref'. */
@@ -9754,9 +9811,9 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
/*
* The default size is the size of the default action: recording
- * the epid.
+ * the header.
*/
- ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
+ ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
ecb->dte_alignment = sizeof (dtrace_epid_t);
epid = state->dts_epid++;
@@ -9854,122 +9911,89 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb)
static void
dtrace_ecb_resize(dtrace_ecb_t *ecb)
{
- uint32_t maxalign = sizeof (dtrace_epid_t);
- uint32_t align = sizeof (uint8_t), offs, diff;
dtrace_action_t *act;
- int wastuple = 0;
+ uint32_t curneeded = UINT32_MAX;
uint32_t aggbase = UINT32_MAX;
- dtrace_state_t *state = ecb->dte_state;
/*
- * If we record anything, we always record the epid. (And we always
- * record it first.)
+ * If we record anything, we always record the dtrace_rechdr_t. (And
+ * we always record it first.)
*/
- offs = sizeof (dtrace_epid_t);
- ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t);
+ ecb->dte_size = sizeof (dtrace_rechdr_t);
+ ecb->dte_alignment = sizeof (dtrace_epid_t);
for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
dtrace_recdesc_t *rec = &act->dta_rec;
+ ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
- if ((align = rec->dtrd_alignment) > maxalign)
- maxalign = align;
-
- if (!wastuple && act->dta_intuple) {
- /*
- * This is the first record in a tuple. Align the
- * offset to be at offset 4 in an 8-byte aligned
- * block.
- */
- diff = offs + sizeof (dtrace_aggid_t);
-
- if ((diff = (diff & (sizeof (uint64_t) - 1))))
- offs += sizeof (uint64_t) - diff;
-
- aggbase = offs - sizeof (dtrace_aggid_t);
- ASSERT(!(aggbase & (sizeof (uint64_t) - 1)));
- }
-
- /*LINTED*/
- if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) {
- /*
- * The current offset is not properly aligned; align it.
- */
- offs += align - diff;
- }
-
- rec->dtrd_offset = offs;
-
- if (offs + rec->dtrd_size > ecb->dte_needed) {
- ecb->dte_needed = offs + rec->dtrd_size;
-
- if (ecb->dte_needed > state->dts_needed)
- state->dts_needed = ecb->dte_needed;
- }
+ ecb->dte_alignment = MAX(ecb->dte_alignment,
+ rec->dtrd_alignment);
if (DTRACEACT_ISAGG(act->dta_kind)) {
dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
- dtrace_action_t *first = agg->dtag_first, *prev;
- ASSERT(rec->dtrd_size != 0 && first != NULL);
- ASSERT(wastuple);
+ ASSERT(rec->dtrd_size != 0);
+ ASSERT(agg->dtag_first != NULL);
+ ASSERT(act->dta_prev->dta_intuple);
ASSERT(aggbase != UINT32_MAX);
+ ASSERT(curneeded != UINT32_MAX);
agg->dtag_base = aggbase;
- while ((prev = first->dta_prev) != NULL &&
- DTRACEACT_ISAGG(prev->dta_kind)) {
- agg = (dtrace_aggregation_t *)prev;
- first = agg->dtag_first;
- }
+ curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
+ rec->dtrd_offset = curneeded;
+ curneeded += rec->dtrd_size;
+ ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
- if (prev != NULL) {
- offs = prev->dta_rec.dtrd_offset +
- prev->dta_rec.dtrd_size;
- } else {
- offs = sizeof (dtrace_epid_t);
+ aggbase = UINT32_MAX;
+ curneeded = UINT32_MAX;
+ } else if (act->dta_intuple) {
+ if (curneeded == UINT32_MAX) {
+ /*
+ * This is the first record in a tuple. Align
+ * curneeded to be at offset 4 in an 8-byte
+ * aligned block.
+ */
+ ASSERT(act->dta_prev == NULL ||
+ !act->dta_prev->dta_intuple);
+ ASSERT3U(aggbase, ==, UINT32_MAX);
+ curneeded = P2PHASEUP(ecb->dte_size,
+ sizeof (uint64_t), sizeof (dtrace_aggid_t));
+
+ aggbase = curneeded - sizeof (dtrace_aggid_t);
+ ASSERT(IS_P2ALIGNED(aggbase,
+ sizeof (uint64_t)));
}
- wastuple = 0;
+ curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
+ rec->dtrd_offset = curneeded;
+ curneeded += rec->dtrd_size;
} else {
- if (!act->dta_intuple)
- ecb->dte_size = offs + rec->dtrd_size;
+ /* tuples must be followed by an aggregation */
+ ASSERT(act->dta_prev == NULL ||
+ !act->dta_prev->dta_intuple);
- offs += rec->dtrd_size;
+ ecb->dte_size = P2ROUNDUP(ecb->dte_size,
+ rec->dtrd_alignment);
+ rec->dtrd_offset = ecb->dte_size;
+ ecb->dte_size += rec->dtrd_size;
+ ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
}
-
- wastuple = act->dta_intuple;
}
if ((act = ecb->dte_action) != NULL &&
!(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
- ecb->dte_size == sizeof (dtrace_epid_t)) {
+ ecb->dte_size == sizeof (dtrace_rechdr_t)) {
/*
- * If the size is still sizeof (dtrace_epid_t), then all
+ * If the size is still sizeof (dtrace_rechdr_t), then all
* actions store no data; set the size to 0.
*/
- ecb->dte_alignment = maxalign;
ecb->dte_size = 0;
-
- /*
- * If the needed space is still sizeof (dtrace_epid_t), then
- * all actions need no additional space; set the needed
- * size to 0.
- */
- if (ecb->dte_needed == sizeof (dtrace_epid_t))
- ecb->dte_needed = 0;
-
- return;
}
- /*
- * Set our alignment, and make sure that the dte_size and dte_needed
- * are aligned to the size of an EPID.
- */
- ecb->dte_alignment = maxalign;
- ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) &
- ~(sizeof (dtrace_epid_t) - 1);
- ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) &
- ~(sizeof (dtrace_epid_t) - 1);
- ASSERT(ecb->dte_size <= ecb->dte_needed);
+ ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
+ ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
+ ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed,
+ ecb->dte_needed);
}
static dtrace_action_t *
@@ -10349,7 +10373,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
break;
case DTRACEACT_SPECULATE:
- if (ecb->dte_size > sizeof (dtrace_epid_t))
+ if (ecb->dte_size > sizeof (dtrace_rechdr_t))
return (EINVAL);
if (dp == NULL)
@@ -10470,7 +10494,7 @@ dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
ecb->dte_action = NULL;
ecb->dte_action_last = NULL;
- ecb->dte_size = sizeof (dtrace_epid_t);
+ ecb->dte_size = 0;
}
static void
@@ -10739,12 +10763,13 @@ dtrace_buffer_switch(dtrace_buffer_t *buf)
caddr_t tomax = buf->dtb_tomax;
caddr_t xamot = buf->dtb_xamot;
dtrace_icookie_t cookie;
- hrtime_t now = dtrace_gethrtime();
+ hrtime_t now;
ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
cookie = dtrace_interrupt_disable();
+ now = dtrace_gethrtime();
buf->dtb_tomax = xamot;
buf->dtb_xamot = tomax;
buf->dtb_xamot_drops = buf->dtb_drops;
@@ -11110,7 +11135,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
if (epid == DTRACE_EPIDNONE) {
size = sizeof (uint32_t);
} else {
- ASSERT(epid <= state->dts_necbs);
+ ASSERT3U(epid, <=, state->dts_necbs);
ASSERT(state->dts_ecbs[epid - 1] != NULL);
size = state->dts_ecbs[epid - 1]->dte_size;
@@ -16387,6 +16412,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
desc.dtbd_drops = buf->dtb_drops;
desc.dtbd_errors = buf->dtb_errors;
desc.dtbd_oldest = buf->dtb_xamot_offset;
+ desc.dtbd_timestamp = dtrace_gethrtime();
mutex_exit(&dtrace_lock);
@@ -16439,6 +16465,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
desc.dtbd_drops = buf->dtb_xamot_drops;
desc.dtbd_errors = buf->dtb_xamot_errors;
desc.dtbd_oldest = 0;
+ desc.dtbd_timestamp = buf->dtb_switched;
mutex_exit(&dtrace_lock);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
index 4968352..4595c2e 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h
@@ -26,6 +26,7 @@
/*
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DTRACE_H
@@ -930,10 +931,10 @@ typedef struct dtrace_ecbdesc {
* DTrace Metadata Description Structures
*
* DTrace separates the trace data stream from the metadata stream. The only
- * metadata tokens placed in the data stream are enabled probe identifiers
- * (EPIDs) or (in the case of aggregations) aggregation identifiers. In order
- * to determine the structure of the data, DTrace consumers pass the token to
- * the kernel, and receive in return a corresponding description of the enabled
+ * metadata tokens placed in the data stream are the dtrace_rechdr_t (EPID +
+ * timestamp) or (in the case of aggregations) aggregation identifiers. To
+ * determine the structure of the data, DTrace consumers pass the token to the
+ * kernel, and receive in return a corresponding description of the enabled
* probe (via the dtrace_eprobedesc structure) or the aggregation (via the
* dtrace_aggdesc structure). Both of these structures are expressed in terms
* of record descriptions (via the dtrace_recdesc structure) that describe the
@@ -1028,7 +1029,8 @@ typedef struct dtrace_fmtdesc {
#define DTRACEOPT_AGGSORTREV 24 /* reverse-sort aggregations */
#define DTRACEOPT_AGGSORTPOS 25 /* agg. position to sort on */
#define DTRACEOPT_AGGSORTKEYPOS 26 /* agg. key position to sort on */
-#define DTRACEOPT_MAX 27 /* number of options */
+#define DTRACEOPT_TEMPORAL 27 /* temporally ordered output */
+#define DTRACEOPT_MAX 28 /* number of options */
#define DTRACEOPT_UNSET (dtrace_optval_t)-2 /* unset option */
@@ -1048,7 +1050,9 @@ typedef struct dtrace_fmtdesc {
* where user-level wishes the kernel to snapshot the buffer to (the
* dtbd_data field). The kernel uses the same structure to pass back some
* information regarding the buffer: the size of data actually copied out, the
- * number of drops, the number of errors, and the offset of the oldest record.
+ * number of drops, the number of errors, the offset of the oldest record,
+ * and the time of the snapshot.
+ *
* If the buffer policy is a "switch" policy, taking a snapshot of the
* principal buffer has the additional effect of switching the active and
* inactive buffers. Taking a snapshot of the aggregation buffer _always_ has
@@ -1061,9 +1065,30 @@ typedef struct dtrace_bufdesc {
uint64_t dtbd_drops; /* number of drops */
DTRACE_PTR(char, dtbd_data); /* data */
uint64_t dtbd_oldest; /* offset of oldest record */
+ uint64_t dtbd_timestamp; /* hrtime of snapshot */
} dtrace_bufdesc_t;
/*
+ * Each record in the buffer (dtbd_data) begins with a header that includes
+ * the epid and a timestamp. The timestamp is split into two 4-byte parts
+ * so that we do not require 8-byte alignment.
+ */
+typedef struct dtrace_rechdr {
+ dtrace_epid_t dtrh_epid; /* enabled probe id */
+ uint32_t dtrh_timestamp_hi; /* high bits of hrtime_t */
+ uint32_t dtrh_timestamp_lo; /* low bits of hrtime_t */
+} dtrace_rechdr_t;
+
+#define DTRACE_RECORD_LOAD_TIMESTAMP(dtrh) \
+ ((dtrh)->dtrh_timestamp_lo + \
+ ((uint64_t)(dtrh)->dtrh_timestamp_hi << 32))
+
+#define DTRACE_RECORD_STORE_TIMESTAMP(dtrh, hrtime) { \
+ (dtrh)->dtrh_timestamp_lo = (uint32_t)hrtime; \
+ (dtrh)->dtrh_timestamp_hi = hrtime >> 32; \
+}
+
+/*
* DTrace Status
*
* The status of DTrace is relayed via the dtrace_status structure. This
diff --git a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h
index 4d9a4f8..7355aa8 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h
+++ b/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace_impl.h
@@ -23,6 +23,7 @@
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright (c) 2012 by Delphix. All rights reserved.
* Use is subject to license terms.
*/
@@ -209,15 +210,18 @@ typedef struct dtrace_hash {
* predicate is non-NULL, the DIF object is executed. If the result is
* non-zero, the action list is processed, with each action being executed
* accordingly. When the action list has been completely executed, processing
- * advances to the next ECB. processing advances to the next ECB. If the
- * result is non-zero; For each ECB, it first determines the The ECB
- * abstraction allows disjoint consumers to multiplex on single probes.
+ * advances to the next ECB. The ECB abstraction allows disjoint consumers
+ * to multiplex on single probes.
+ *
+ * Execution of the ECB results in consuming dte_size bytes in the buffer
+ * to record data. During execution, dte_needed bytes must be available in
+ * the buffer. This space is used for both recorded data and tuple data.
*/
struct dtrace_ecb {
dtrace_epid_t dte_epid; /* enabled probe ID */
uint32_t dte_alignment; /* required alignment */
- size_t dte_needed; /* bytes needed */
- size_t dte_size; /* total size of payload */
+ size_t dte_needed; /* space needed for execution */
+ size_t dte_size; /* size of recorded payload */
dtrace_predicate_t *dte_predicate; /* predicate, if any */
dtrace_action_t *dte_action; /* actions, if any */
dtrace_ecb_t *dte_next; /* next ECB on probe */
@@ -275,27 +279,30 @@ typedef struct dtrace_aggregation {
* the EPID, the consumer can determine the data layout. (The data buffer
* layout is shown schematically below.) By assuring that one can determine
* data layout from the EPID, the metadata stream can be separated from the
- * data stream -- simplifying the data stream enormously.
- *
- * base of data buffer ---> +------+--------------------+------+
- * | EPID | data | EPID |
- * +------+--------+------+----+------+
- * | data | EPID | data |
- * +---------------+------+-----------+
- * | data, cont. |
- * +------+--------------------+------+
- * | EPID | data | |
- * +------+--------------------+ |
- * | || |
- * | || |
- * | \/ |
- * : :
- * . .
- * . .
- * . .
- * : :
- * | |
- * limit of data buffer ---> +----------------------------------+
+ * data stream -- simplifying the data stream enormously. The ECB always
+ * proceeds the recorded data as part of the dtrace_rechdr_t structure that
+ * includes the EPID and a high-resolution timestamp used for output ordering
+ * consistency.
+ *
+ * base of data buffer ---> +--------+--------------------+--------+
+ * | rechdr | data | rechdr |
+ * +--------+------+--------+----+--------+
+ * | data | rechdr | data |
+ * +---------------+--------+-------------+
+ * | data, cont. |
+ * +--------+--------------------+--------+
+ * | rechdr | data | |
+ * +--------+--------------------+ |
+ * | || |
+ * | || |
+ * | \/ |
+ * : :
+ * . .
+ * . .
+ * . .
+ * : :
+ * | |
+ * limit of data buffer ---> +--------------------------------------+
*
* When evaluating an ECB, dtrace_probe() determines if the ECB's needs of the
* principal buffer (both scratch and payload) exceed the available space. If
diff --git a/sys/cddl/dev/dtrace/dtrace_ioctl.c b/sys/cddl/dev/dtrace/dtrace_ioctl.c
index 6a01e13..bb03ffd 100644
--- a/sys/cddl/dev/dtrace/dtrace_ioctl.c
+++ b/sys/cddl/dev/dtrace/dtrace_ioctl.c
@@ -274,6 +274,7 @@ dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
desc.dtbd_drops = buf->dtb_drops;
desc.dtbd_errors = buf->dtb_errors;
desc.dtbd_oldest = buf->dtb_xamot_offset;
+ desc.dtbd_timestamp = dtrace_gethrtime();
mutex_exit(&dtrace_lock);
@@ -328,6 +329,7 @@ dtrace_ioctl(struct cdev *dev, u_long cmd, caddr_t addr,
desc.dtbd_drops = buf->dtb_xamot_drops;
desc.dtbd_errors = buf->dtb_xamot_errors;
desc.dtbd_oldest = 0;
+ desc.dtbd_timestamp = buf->dtb_switched;
mutex_exit(&dtrace_lock);
OpenPOWER on IntegriCloud