summaryrefslogtreecommitdiffstats
path: root/sys/kern/subr_prof.c
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>1996-10-17 19:32:31 +0000
committerbde <bde@FreeBSD.org>1996-10-17 19:32:31 +0000
commita0f16401c5c8aa8537932ccc296e9253fca3debd (patch)
tree2f3c01bb1684b82e42c63c0a17d658ba5ded662b /sys/kern/subr_prof.c
parentd0d507caa9d6718e7f6699bae600f120362f59b7 (diff)
downloadFreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.zip
FreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.tar.gz
Improved non-statistical (GUPROF) profiling:
- use a more accurate and more efficient method of compensating for overheads. The old method counted too much time against leaf functions. - normally use the Pentium timestamp counter if available. On Pentiums, the times are now accurate to within a couple of cpu clock cycles per function call in the (unlikely) event that there are no cache misses in or caused by the profiling code. - optionally use an arbitrary Pentium event counter if available. - optionally regress to using the i8254 counter. - scaled the i8254 counter by a factor of 128. Now the i8254 counters overflow slightly faster than the TSC counters for a 150MHz Pentium :-) (after about 16 seconds). This is to avoid fractional overheads. files.i386: permon.c temporarily has to be classified as a profiling-routine because a couple of functions in it may be called from profiling code. options.i386: - I586_CTR_GUPROF is currently unused (oops). - I586_PMC_GUPROF should be something like 0x70000 to enable (but not use unless prof_machdep.c is changed) support for Pentium event counters. 7 is a control mode and the counter number 0 is somewhere in the 0000 bits (see perfmon.h for the encoding). profile.h: - added declarations. - cleaned up separation of user mode declarations. prof_machdep.c: Mostly clock-select changes. The default clock can be changed by editing kmem. There should be a sysctl for this. subr_prof.c: - added copyright. - calibrate overheads for the new method. - documented new method. - fixed races and and machine dependencies in start/stop code. mcount.c: Use the new overhead compensation method. gmon.h: - changed GPROF4 counter type from unsigned to int. Oops, this should be machine-dependent and/or int32_t. - reorganized overhead counters. Submitted by: Pentium event counter changes mostly by wollman
Diffstat (limited to 'sys/kern/subr_prof.c')
-rw-r--r--sys/kern/subr_prof.c186
1 files changed, 149 insertions, 37 deletions
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index 0727f9b..d17a3b0 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
- * $Id: subr_prof.c,v 1.15 1995/12/26 01:21:39 bde Exp $
+ * $Id: subr_prof.c,v 1.16 1995/12/29 15:29:08 bde Exp $
*/
#include <sys/param.h>
@@ -56,6 +56,22 @@ struct gmonparam _gmonparam = { GMON_PROF_OFF };
extern char btext[];
extern char etext[];
+#ifdef GUPROF
+void
+nullfunc_loop_profiled()
+{
+ int i;
+
+ for (i = 0; i < CALIB_SCALE; i++)
+ nullfunc_profiled();
+}
+
+void
+nullfunc_profiled()
+{
+}
+#endif /* GUPROF */
+
static void
kmstartup(dummy)
void *dummy;
@@ -63,8 +79,14 @@ kmstartup(dummy)
char *cp;
struct gmonparam *p = &_gmonparam;
#ifdef GUPROF
- fptrint_t kmstartup_addr;
+ int cputime_overhead;
+ int empty_loop_time;
int i;
+ fptrint_t kmstartup_addr;
+ int mcount_overhead;
+ int mexitcount_overhead;
+ int nullfunc_loop_overhead;
+ int nullfunc_loop_profiled_time;
#endif
/*
@@ -74,7 +96,7 @@ kmstartup(dummy)
p->lowpc = ROUNDDOWN((u_long)btext, HISTFRACTION * sizeof(HISTCOUNTER));
p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
p->textsize = p->highpc - p->lowpc;
- printf("Profiling kernel, textsize=%d [%x..%x]\n",
+ printf("Profiling kernel, textsize=%lu [%x..%x]\n",
p->textsize, p->lowpc, p->highpc);
p->kcountsize = p->textsize / HISTFRACTION;
p->hashfraction = HASHFRACTION;
@@ -99,41 +121,56 @@ kmstartup(dummy)
p->froms = (u_short *)cp;
#ifdef GUPROF
- /*
- * Initialize pointers to overhead counters.
- */
+ /* Initialize pointers to overhead counters. */
p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime));
p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount));
p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount));
/*
- * Determine overheads.
+ * Disable interrupts to avoid interference while we calibrate
+ * things.
*/
disable_intr();
- p->state = GMON_PROF_HIRES;
- p->cputime_overhead = 0;
- (void)cputime();
+ /*
+ * Determine overheads.
+ * XXX this needs to be repeated for each useful timer/counter.
+ */
+ cputime_overhead = 0;
+ startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
- p->cputime_overhead += cputime();
+ cputime_overhead += cputime();
+
+ empty_loop();
+ startguprof(p);
+ empty_loop();
+ empty_loop_time = cputime();
+
+ nullfunc_loop_profiled();
+
+ /*
+ * Start profiling. There won't be any normal function calls since
+ * interrupts are disabled, but we will call the profiling routines
+ * directly to determine their overheads.
+ */
+ p->state = GMON_PROF_HIRES;
+
+ startguprof(p);
+ nullfunc_loop_profiled();
- (void)cputime();
+ startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
#if defined(i386) && __GNUC__ >= 2
- /*
- * Underestimate slightly by always calling __mcount, never
- * mcount.
- */
asm("pushl %0; call __mcount; popl %%ecx"
:
- : "i" (kmstartup)
+ : "i" (profil)
: "ax", "bx", "cx", "dx", "memory");
#else
#error
#endif
- p->mcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup));
+ mcount_overhead = KCOUNT(p, PC_TO_I(p, profil));
- (void)cputime();
+ startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
#if defined(i386) && __GNUC__ >= 2
asm("call mexitcount; 1:"
@@ -142,25 +179,96 @@ kmstartup(dummy)
#else
#error
#endif
- p->mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr));
+ mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr));
p->state = GMON_PROF_OFF;
+ stopguprof(p);
+
enable_intr();
- p->mcount_overhead_sub = p->mcount_overhead - p->cputime_overhead;
- p->mexitcount_overhead_sub = p->mexitcount_overhead
- - p->cputime_overhead;
- printf("Profiling overheads: %u+%u %u+%u\n",
- p->cputime_overhead, p->mcount_overhead_sub,
- p->cputime_overhead, p->mexitcount_overhead_sub);
- p->cputime_overhead_frac = p->cputime_overhead % CALIB_SCALE;
- p->cputime_overhead /= CALIB_SCALE;
- p->mcount_overhead_frac = p->mcount_overhead_sub % CALIB_SCALE;
- p->mcount_overhead_sub /= CALIB_SCALE;
- p->mcount_overhead /= CALIB_SCALE;
- p->mexitcount_overhead_frac = p->mexitcount_overhead_sub % CALIB_SCALE;
- p->mexitcount_overhead_sub /= CALIB_SCALE;
- p->mexitcount_overhead /= CALIB_SCALE;
+ nullfunc_loop_profiled_time = 0;
+ for (i = 0; i < 28; i += sizeof(HISTCOUNTER)) {
+ int x;
+
+ x = KCOUNT(p, PC_TO_I(p,
+ (fptrint_t)nullfunc_loop_profiled + i));
+ nullfunc_loop_profiled_time += x;
+ printf("leaf[%d] = %d sum %d\n",
+ i, x, nullfunc_loop_profiled_time);
+ }
+#define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE)
+#define c2n(count, freq) ((int)((count) * 1000000000LL / freq))
+ printf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n",
+ CALIB_DOSCALE(c2n(cputime_overhead, p->profrate)),
+ CALIB_DOSCALE(c2n(empty_loop_time, p->profrate)),
+ CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time, p->profrate)),
+ CALIB_DOSCALE(c2n(mcount_overhead, p->profrate)),
+ CALIB_DOSCALE(c2n(mexitcount_overhead, p->profrate)));
+ cputime_overhead -= empty_loop_time;
+ mcount_overhead -= empty_loop_time;
+ mexitcount_overhead -= empty_loop_time;
+
+ /*-
+ * Profiling overheads are determined by the times between the
+ * following events:
+ * MC1: mcount() is called
+ * MC2: cputime() (called from mcount()) latches the timer
+ * MC3: mcount() completes
+ * ME1: mexitcount() is called
+ * ME2: cputime() (called from mexitcount()) latches the timer
+ * ME3: mexitcount() completes.
+ * The times between the events vary slightly depending on instruction
+ * combination and cache misses, etc. Attempt to determine the
+ * minimum times. These can be subtracted from the profiling times
+ * without much risk of reducing the profiling times below what they
+ * would be when profiling is not configured. Abbreviate:
+ * ab = minimum time between MC1 and MC3
+ * a = minumum time between MC1 and MC2
+ * b = minimum time between MC2 and MC3
+ * cd = minimum time between ME1 and ME3
+ * c = minimum time between ME1 and ME2
+ * d = minimum time between ME2 and ME3.
+ * These satisfy the relations:
+ * ab <= mcount_overhead (just measured)
+ * a + b <= ab
+ * cd <= mexitcount_overhead (just measured)
+ * c + d <= cd
+ * a + d <= nullfunc_loop_profiled_time (just measured)
+ * a >= 0, b >= 0, c >= 0, d >= 0.
+ * Assume that ab and cd are equal to the minimums.
+ */
+ p->cputime_overhead = CALIB_DOSCALE(cputime_overhead);
+ p->mcount_overhead = CALIB_DOSCALE(mcount_overhead - cputime_overhead);
+ p->mexitcount_overhead = CALIB_DOSCALE(mexitcount_overhead
+ - cputime_overhead);
+ nullfunc_loop_overhead = nullfunc_loop_profiled_time - empty_loop_time;
+ p->mexitcount_post_overhead = CALIB_DOSCALE((mcount_overhead
+ - nullfunc_loop_overhead)
+ / 4);
+ p->mexitcount_pre_overhead = p->mexitcount_overhead
+ + p->cputime_overhead
+ - p->mexitcount_post_overhead;
+ p->mcount_pre_overhead = CALIB_DOSCALE(nullfunc_loop_overhead)
+ - p->mexitcount_post_overhead;
+ p->mcount_post_overhead = p->mcount_overhead
+ + p->cputime_overhead
+ - p->mcount_pre_overhead;
+ printf(
+"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n",
+ c2n(p->cputime_overhead, p->profrate),
+ c2n(p->mcount_overhead, p->profrate),
+ c2n(p->mcount_pre_overhead, p->profrate),
+ c2n(p->mcount_post_overhead, p->profrate),
+ c2n(p->cputime_overhead, p->profrate),
+ c2n(p->mexitcount_overhead, p->profrate),
+ c2n(p->mexitcount_pre_overhead, p->profrate),
+ c2n(p->mexitcount_post_overhead, p->profrate));
+ printf(
+"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n",
+ p->cputime_overhead, p->mcount_overhead,
+ p->mcount_pre_overhead, p->mcount_post_overhead,
+ p->cputime_overhead, p->mexitcount_overhead,
+ p->mexitcount_pre_overhead, p->mexitcount_post_overhead);
#endif /* GUPROF */
}
@@ -189,16 +297,20 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS
if (!req->newptr)
return (0);
if (state == GMON_PROF_OFF) {
- stopprofclock(&proc0);
gp->state = state;
+ stopprofclock(&proc0);
+ stopguprof(gp);
} else if (state == GMON_PROF_ON) {
+ gp->state = GMON_PROF_OFF;
+ stopguprof(gp);
gp->profrate = profhz;
- gp->state = state;
startprofclock(&proc0);
+ gp->state = state;
#ifdef GUPROF
} else if (state == GMON_PROF_HIRES) {
- gp->profrate = 1193182; /* XXX */
+ gp->state = GMON_PROF_OFF;
stopprofclock(&proc0);
+ startguprof(gp);
gp->state = state;
#endif
} else if (state != gp->state)
OpenPOWER on IntegriCloud