summaryrefslogtreecommitdiffstats
path: root/usr.sbin
diff options
context:
space:
mode:
authorjtl <jtl@FreeBSD.org>2016-01-14 22:02:21 +0000
committerjtl <jtl@FreeBSD.org>2016-01-14 22:02:21 +0000
commit5c4bf04e87f1d17905c66f269050c88636b1a691 (patch)
tree4df43a118a2571121c8cd4736793f324b969c6a2 /usr.sbin
parentba34754e3ceca35bf44d95762f8d769933e81c53 (diff)
downloadFreeBSD-src-5c4bf04e87f1d17905c66f269050c88636b1a691.zip
FreeBSD-src-5c4bf04e87f1d17905c66f269050c88636b1a691.tar.gz
MFC r290811:
Fix hwpmc "stalled" behavior Currently, there is a single pm_stalled flag that tracks whether a performance monitor was "stalled" due to insufficent ring buffer space for samples. However, because the same performance monitor can run on multiple processes or threads at the same time, a single pm_stalled flag that impacts them all seems insufficient. In particular, you can hit corner cases where the code fails to stop performance monitors during a context switch out, because it thinks the performance monitor is already stopped. However, in reality, it may be that only the monitor running on a different CPU was stalled. This patch attempts to fix that behavior by tracking on a per-CPU basis whether a PM desires to run and whether it is "stalled". This lets the code make better decisions about when to stop PMs and when to try to restart them. Ideally, we should avoid the case where the code fails to stop a PM during a context switch out. MFC r290813: Optimizations to the way hwpmc gathers user callchains Changes to the code to gather user stacks: * Delay setting pmc_cpumask until we actually have the stack. * When recording user stack traces, only walk the portion of the ring that should have samples for us. MFC r290929: Change the driver stats to what they really are: unsigned values. When pmcstat exits after some samples were dropped, give the user an idea of how many were lost. (Granted, these are global numbers, but they may still help quantify the scope of the loss.) MFC r290930: Improve accuracy of PMC sampling frequency The code tracks a counter which is the number of events until the next sample. On context switch in, it loads the saved counter. On context switch out, it tries to calculate a new saved counter. Problems: 1. The saved counter was shared by all threads in a process. However, this means that all threads would be initially loaded with the same saved counter. However, that could result in sampling more often than once every X number of events. 2. The calculation to determine a new saved counter was backwards. It added when it should have subtracted, and subtracted when it should have added. Assume a single-threaded process with a reload count of 1000 events. Assuming the counter on context switch in was 100 and the counter on context switch out was 50 (meaning the thread has "consumed" 50 more events), the code would calculate a new saved counter of 150 (instead of the proper 50). Fix: 1. As soon as the saved counter is used to initialize a monitor for a thread on context switch in, set the saved counter to the reload count. That way, subsequent threads to use the saved counter will get the full reload count, assuring we sample at least once every X number of events (across all threads). 2. Change the calculation of the saved counter. Due to the change to the saved counter in #1, we simply need to add (modulo the reload count) the remaining counter time we retrieve from the CPU when a thread is context switched out. MFC r291016: Support a wider history counter in pmcstat(8) gmon output pmcstat(8) contains an option to output sampling data in a gmon format compatible with gprof(1). Currently, it uses the default histcounter, which is an (unsigned short). With large sets of sampling data, it is possible to overflow the maximum value provided by an (unsigned short). This change adds the -e argument to pmcstat. If -e and -g are both specified, pmcstat will use a histcounter type of uint64_t. MFC r291017: Fix the date on the pmcstat(8) man page from r291016.
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/pmcstat/pmcpl_gprof.c54
-rw-r--r--usr.sbin/pmcstat/pmcstat.89
-rw-r--r--usr.sbin/pmcstat/pmcstat.c32
-rw-r--r--usr.sbin/pmcstat/pmcstat.h1
4 files changed, 78 insertions, 18 deletions
diff --git a/usr.sbin/pmcstat/pmcpl_gprof.c b/usr.sbin/pmcstat/pmcpl_gprof.c
index 9ff78e8..5fc9b41 100644
--- a/usr.sbin/pmcstat/pmcpl_gprof.c
+++ b/usr.sbin/pmcstat/pmcpl_gprof.c
@@ -74,6 +74,14 @@ __FBSDID("$FreeBSD$");
#include "pmcpl_callgraph.h"
#include "pmcpl_gprof.h"
+typedef uint64_t WIDEHISTCOUNTER;
+
+#define WIDEHISTCOUNTER_MAX UINT64_MAX
+#define HISTCOUNTER_MAX USHRT_MAX
+#define WIDEHISTCOUNTER_GMONTYPE ((int) 64)
+#define HISTCOUNTER_GMONTYPE ((int) 0)
+static int hc_sz=0;
+
/*
* struct pmcstat_gmonfile tracks a given 'gmon.out' file. These
* files are mmap()'ed in as needed.
@@ -126,11 +134,13 @@ pmcstat_gmon_create_file(struct pmcstat_gmonfile *pgf,
gm.lpc = image->pi_start;
gm.hpc = image->pi_end;
- gm.ncnt = (pgf->pgf_nbuckets * sizeof(HISTCOUNTER)) +
- sizeof(struct gmonhdr);
+ gm.ncnt = (pgf->pgf_nbuckets * hc_sz) + sizeof(struct gmonhdr);
gm.version = GMONVERSION;
gm.profrate = 0; /* use ticks */
- gm.histcounter_type = 0; /* compatibility with moncontrol() */
+ if (args.pa_flags & FLAG_DO_WIDE_GPROF_HC)
+ gm.histcounter_type = WIDEHISTCOUNTER_GMONTYPE;
+ else
+ gm.histcounter_type = HISTCOUNTER_GMONTYPE;
gm.spare[0] = gm.spare[1] = 0;
/* Write out the gmon header */
@@ -400,6 +410,7 @@ pmcpl_gmon_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
struct pmcstat_gmonfile *pgf;
uintfptr_t bucket;
HISTCOUNTER *hc;
+ WIDEHISTCOUNTER *whc;
pmc_id_t pmcid;
(void) nsamples; (void) usermode; (void) cpu;
@@ -437,6 +448,14 @@ pmcpl_gmon_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
*/
pgf = pmcstat_image_find_gmonfile(image, pmcid);
if (pgf == NULL) {
+ if (hc_sz == 0) {
+ /* Determine the correct histcounter size. */
+ if (args.pa_flags & FLAG_DO_WIDE_GPROF_HC)
+ hc_sz = sizeof(WIDEHISTCOUNTER);
+ else
+ hc_sz = sizeof(HISTCOUNTER);
+ }
+
if ((pgf = calloc(1, sizeof(*pgf))) == NULL)
err(EX_OSERR, "ERROR:");
@@ -448,7 +467,7 @@ pmcpl_gmon_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
pgf->pgf_nbuckets = (image->pi_end - image->pi_start) /
FUNCTION_ALIGNMENT; /* see <machine/profile.h> */
pgf->pgf_ndatabytes = sizeof(struct gmonhdr) +
- pgf->pgf_nbuckets * sizeof(HISTCOUNTER);
+ pgf->pgf_nbuckets * hc_sz;
pgf->pgf_nsamples = 0;
pgf->pgf_file = NULL;
@@ -474,14 +493,25 @@ pmcpl_gmon_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
assert(bucket < pgf->pgf_nbuckets);
- hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata +
- sizeof(struct gmonhdr));
-
- /* saturating add */
- if (hc[bucket] < 0xFFFFU) /* XXX tie this to sizeof(HISTCOUNTER) */
- hc[bucket]++;
- else /* mark that an overflow occurred */
- pgf->pgf_overflow = 1;
+ if (args.pa_flags & FLAG_DO_WIDE_GPROF_HC) {
+ whc = (WIDEHISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata +
+ sizeof(struct gmonhdr));
+
+ /* saturating add */
+ if (whc[bucket] < WIDEHISTCOUNTER_MAX)
+ whc[bucket]++;
+ else /* mark that an overflow occurred */
+ pgf->pgf_overflow = 1;
+ } else {
+ hc = (HISTCOUNTER *) ((uintptr_t) pgf->pgf_gmondata +
+ sizeof(struct gmonhdr));
+
+ /* saturating add */
+ if (hc[bucket] < HISTCOUNTER_MAX)
+ hc[bucket]++;
+ else /* mark that an overflow occurred */
+ pgf->pgf_overflow = 1;
+ }
pgf->pgf_nsamples++;
}
diff --git a/usr.sbin/pmcstat/pmcstat.8 b/usr.sbin/pmcstat/pmcstat.8
index 7de335d..31b136b 100644
--- a/usr.sbin/pmcstat/pmcstat.8
+++ b/usr.sbin/pmcstat/pmcstat.8
@@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
-.Dd May 27, 2015
+.Dd November 18, 2015
.Dt PMCSTAT 8
.Os
.Sh NAME
@@ -49,6 +49,7 @@
.Op Fl a Ar pathname
.Op Fl c Ar cpu-spec
.Op Fl d
+.Op Fl e
.Op Fl f Ar pluginopt
.Op Fl g
.Op Fl k Ar kerneldir
@@ -260,6 +261,12 @@ The default is to measure events for the target process alone.
.Fl P ,
or
.Fl S ) .
+.It Fl e
+Specify that the gprof profile files will use a wide history counter.
+These files are produced in a format compatible with
+.Xr gprof 1 .
+However, other tools that cannot fully parse a BSD-style
+gmon header might be unable to correctly parse these files.
.It Fl f Ar pluginopt
Pass option string to the active plugin.
.br
diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c
index 5ae53aa..481213a 100644
--- a/usr.sbin/pmcstat/pmcstat.c
+++ b/usr.sbin/pmcstat/pmcstat.c
@@ -506,6 +506,7 @@ pmcstat_show_usage(void)
"\t -a <file>\t print sampled PCs and callgraph to \"file\"\n"
"\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n"
"\t -d\t\t (toggle) track descendants\n"
+ "\t -e\t\t use wide history counter for gprof(1) output\n"
"\t -f spec\t pass \"spec\" to as plugin option\n"
"\t -g\t\t produce gprof(1) compatible profiles\n"
"\t -k dir\t\t set the path to the kernel\n"
@@ -627,7 +628,7 @@ main(int argc, char **argv)
CPU_COPY(&rootmask, &cpumask);
while ((option = getopt(argc, argv,
- "CD:EF:G:M:NO:P:R:S:TWa:c:df:gk:l:m:n:o:p:qr:s:t:vw:z:")) != -1)
+ "CD:EF:G:M:NO:P:R:S:TWa:c:def:gk:l:m:n:o:p:qr:s:t:vw:z:")) != -1)
switch (option) {
case 'a': /* Annotate + callgraph */
args.pa_flags |= FLAG_DO_ANNOTATE;
@@ -668,6 +669,10 @@ main(int argc, char **argv)
args.pa_required |= FLAG_HAS_PROCESS_PMCS;
break;
+ case 'e': /* wide gprof metrics */
+ args.pa_flags |= FLAG_DO_WIDE_GPROF_HC;
+ break;
+
case 'F': /* produce a system-wide calltree */
args.pa_flags |= FLAG_DO_CALLGRAPHS;
args.pa_plugin = PMCSTAT_PL_CALLTREE;
@@ -1022,6 +1027,13 @@ main(int argc, char **argv)
"ERROR: options -g/-G/-m/-T require sampling PMCs or -R to be specified."
);
+ /* check if -e was specified without -g */
+ if ((args.pa_flags & FLAG_DO_WIDE_GPROF_HC) &&
+ !(args.pa_flags & FLAG_DO_GPROF))
+ errx(EX_USAGE,
+"ERROR: option -e requires gprof mode to be specified."
+ );
+
/* check if -O was spuriously specified */
if ((args.pa_flags & FLAG_HAS_OUTPUT_LOGFILE) &&
(args.pa_required & FLAG_HAS_OUTPUT_LOGFILE) == 0)
@@ -1500,14 +1512,24 @@ main(int argc, char **argv)
"ERROR: Cannot retrieve driver statistics");
if (ds_start.pm_intr_bufferfull != ds_end.pm_intr_bufferfull &&
args.pa_verbosity > 0)
- warnx("WARNING: some samples were dropped.\n"
-"Please consider tuning the \"kern.hwpmc.nsamples\" tunable."
+ warnx(
+"WARNING: sampling was paused at least %u time%s.\n"
+"Please consider tuning the \"kern.hwpmc.nsamples\" tunable.",
+ ds_end.pm_intr_bufferfull -
+ ds_start.pm_intr_bufferfull,
+ ((ds_end.pm_intr_bufferfull -
+ ds_start.pm_intr_bufferfull) != 1) ? "s" : ""
);
if (ds_start.pm_buffer_requests_failed !=
ds_end.pm_buffer_requests_failed &&
args.pa_verbosity > 0)
- warnx("WARNING: some events were discarded.\n"
-"Please consider tuning the \"kern.hwpmc.nbuffers\" tunable."
+ warnx(
+"WARNING: at least %u event%s were discarded while running.\n"
+"Please consider tuning the \"kern.hwpmc.nbuffers\" tunable.",
+ ds_end.pm_buffer_requests_failed -
+ ds_start.pm_buffer_requests_failed,
+ ((ds_end.pm_buffer_requests_failed -
+ ds_start.pm_buffer_requests_failed) != 1) ? "s" : ""
);
}
diff --git a/usr.sbin/pmcstat/pmcstat.h b/usr.sbin/pmcstat/pmcstat.h
index 29dfeb7..5b1d3d9 100644
--- a/usr.sbin/pmcstat/pmcstat.h
+++ b/usr.sbin/pmcstat/pmcstat.h
@@ -55,6 +55,7 @@
#define FLAG_DO_ANALYSIS 0x00020000 /* -g or -G or -m or -T */
#define FLAGS_HAS_CPUMASK 0x00040000 /* -c */
#define FLAG_HAS_DURATION 0x00080000 /* -l secs */
+#define FLAG_DO_WIDE_GPROF_HC 0x00100000 /* -e */
#define DEFAULT_SAMPLE_COUNT 65536
#define DEFAULT_WAIT_INTERVAL 5.0
OpenPOWER on IntegriCloud