diff options
author | bde <bde@FreeBSD.org> | 1996-10-17 19:32:31 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 1996-10-17 19:32:31 +0000 |
commit | a0f16401c5c8aa8537932ccc296e9253fca3debd (patch) | |
tree | 2f3c01bb1684b82e42c63c0a17d658ba5ded662b | |
parent | d0d507caa9d6718e7f6699bae600f120362f59b7 (diff) | |
download | FreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.zip FreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.tar.gz |
Improved non-statistical (GUPROF) profiling:
- use a more accurate and more efficient method of compensating for
overheads. The old method counted too much time against leaf
functions.
- normally use the Pentium timestamp counter if available.
On Pentiums, the times are now accurate to within a couple of cpu
clock cycles per function call in the (unlikely) event that there
are no cache misses in or caused by the profiling code.
- optionally use an arbitrary Pentium event counter if available.
- optionally regress to using the i8254 counter.
- scaled the i8254 counter by a factor of 128. Now the i8254 counters
overflow slightly faster than the TSC counters for a 150MHz Pentium :-)
(after about 16 seconds). This is to avoid fractional overheads.
files.i386:
permon.c temporarily has to be classified as a profiling-routine
because a couple of functions in it may be called from profiling code.
options.i386:
- I586_CTR_GUPROF is currently unused (oops).
- I586_PMC_GUPROF should be something like 0x70000 to enable (but not
use unless prof_machdep.c is changed) support for Pentium event
counters. 7 is a control mode and the counter number 0 is somewhere
in the 0000 bits (see perfmon.h for the encoding).
profile.h:
- added declarations.
- cleaned up separation of user mode declarations.
prof_machdep.c:
Mostly clock-select changes. The default clock can be changed by
editing kmem. There should be a sysctl for this.
subr_prof.c:
- added copyright.
- calibrate overheads for the new method.
- documented new method.
- fixed races and and machine dependencies in start/stop code.
mcount.c:
Use the new overhead compensation method.
gmon.h:
- changed GPROF4 counter type from unsigned to int. Oops, this should
be machine-dependent and/or int32_t.
- reorganized overhead counters.
Submitted by: Pentium event counter changes mostly by wollman
-rw-r--r-- | sys/amd64/amd64/prof_machdep.c | 168 | ||||
-rw-r--r-- | sys/amd64/include/profile.h | 36 | ||||
-rw-r--r-- | sys/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/conf/options.i386 | 4 | ||||
-rw-r--r-- | sys/i386/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/i386/conf/options.i386 | 4 | ||||
-rw-r--r-- | sys/i386/include/profile.h | 36 | ||||
-rw-r--r-- | sys/i386/isa/prof_machdep.c | 168 | ||||
-rw-r--r-- | sys/kern/subr_prof.c | 186 | ||||
-rw-r--r-- | sys/libkern/mcount.c | 130 | ||||
-rw-r--r-- | sys/sys/gmon.h | 25 |
11 files changed, 597 insertions, 166 deletions
diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c index f140b84..62c8df5 100644 --- a/sys/amd64/amd64/prof_machdep.c +++ b/sys/amd64/amd64/prof_machdep.c @@ -1,17 +1,64 @@ -/* - * NEED A COPYRIGHT NOPTICE HERE +/*- + * Copyright (c) 1996 Bruce D. Evans. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * $Id$ + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: prof_machdep.c,v 1.2 1996/04/08 16:41:06 wollman Exp $ */ + +#ifdef GUPROF +#include "opt_cpu.h" +#include "opt_i586_guprof.h" +#include "opt_perfmon.h" + #include <sys/param.h> #include <sys/systm.h> +#include <sys/gmon.h> + #include <machine/clock.h> +#include <machine/perfmon.h> +#include <machine/profile.h> +#endif + #include <i386/isa/isa.h> #include <i386/isa/timerreg.h> #ifdef GUPROF -extern u_int cputime __P((void)); +#define CPUTIME_CLOCK_UNINITIALIZED 0 +#define CPUTIME_CLOCK_I8254 1 +#define CPUTIME_CLOCK_I586_CTR 2 +#define CPUTIME_CLOCK_I586_PMC 3 +#define CPUTIME_CLOCK_I8254_SHIFT 7 + +int cputime_bias = 1; /* initialize for locality of reference */ + +static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; +#ifdef I586_PMC_GUPROF +static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; +static int cputime_clock_pmc_init; +static struct gmonparam saved_gmp; #endif +#endif /* GUPROF */ #ifdef __GNUC__ asm(" @@ -52,13 +99,13 @@ Lgot_frompc: # movl (%esp),%eax - pushf + pushfl pushl %eax pushl %edx cli call _mcount addl $8,%esp - popf + popfl Lmcount_exit: ret "); @@ -94,12 +141,12 @@ mexitcount: pushl %edx pushl %eax movl 8(%esp),%eax - pushf + pushfl pushl %eax cli call _mexitcount addl $4,%esp - popf + popfl popl %eax popl %edx Lmexitcount_exit: @@ -113,20 +160,48 @@ Lmexitcount_exit: * Return the time elapsed since the last call. The units are machine- * dependent. */ -u_int +int cputime() { u_int count; - u_int delta; - u_char low; + int delta; +#ifdef I586_PMC_GUPROF + u_quad_t event_count; +#endif + u_char high, low; static u_int prev_count; +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_I586_CTR) { + count = (u_int)rdtsc(); + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#ifdef I586_PMC_GUPROF + if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + /* + * XXX permon_read() should be inlined so that the + * perfmon module doesn't need to be compiled with + * profiling disabled and so that it is fast. + */ + perfmon_read(0, &event_count); + + count = (u_int)event_count; + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#endif /* I586_PMC_GUPROF */ +#endif /* I586_CPU or I686_CPU */ + /* * Read the current value of the 8254 timer counter 0. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); - count = low | (inb(TIMER_CNTR0) << 8); + high = inb(TIMER_CNTR0); + count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; /* * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. @@ -140,10 +215,75 @@ cputime() delta = prev_count - count; prev_count = count; if ((int) delta <= 0) - return (delta + timer0_max_count); + return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); return (delta); } -#else /* not GUPROF */ + +/* + * The start and stop routines need not be here since we turn off profiling + * before calling them. They are here for convenience. + */ + +void +startguprof(gp) + struct gmonparam *gp; +{ + if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { + cputime_clock = CPUTIME_CLOCK_I8254; +#if defined(I586_CPU) || defined(I686_CPU) + if (i586_ctr_freq != 0) + cputime_clock = CPUTIME_CLOCK_I586_CTR; +#endif + } + gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_I586_CTR) + gp->profrate = i586_ctr_freq; +#ifdef I586_PMC_GUPROF + else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + if (perfmon_avail() && + perfmon_setup(0, cputime_clock_pmc_conf) == 0) { + if (perfmon_start(0) != 0) + perfmon_fini(0); + else { + /* XXX 1 event == 1 us. */ + gp->profrate = 1000000; + + saved_gmp = *gp; + + /* Zap overheads. They are invalid. */ + gp->cputime_overhead = 0; + gp->mcount_overhead = 0; + gp->mcount_post_overhead = 0; + gp->mcount_pre_overhead = 0; + gp->mexitcount_overhead = 0; + gp->mexitcount_post_overhead = 0; + gp->mexitcount_pre_overhead = 0; + + cputime_clock_pmc_init = TRUE; + } + } + } +#endif /* I586_PMC_GUPROF */ +#endif /* I586_CPU or I686_CPU */ + cputime_bias = 0; + cputime(); +} + +void +stopguprof(gp) + struct gmonparam *gp; +{ +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + if (cputime_clock_pmc_init) { + *gp = saved_gmp; + perfmon_fini(0); + cputime_clock_pmc_init = FALSE; + } +#endif +} + +#else /* !GUPROF */ #ifdef __GNUC__ asm(" .text diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h index 4b9d51b..08d0dbd 100644 --- a/sys/amd64/include/profile.h +++ b/sys/amd64/include/profile.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.6 1996/01/01 17:11:21 bde Exp $ + * $Id: profile.h,v 1.7 1996/08/28 20:15:25 bde Exp $ */ #ifndef _MACHINE_PROFILE_H_ @@ -97,18 +97,44 @@ typedef u_int fptrint_t; */ typedef int fptrdiff_t; -__BEGIN_DECLS #ifdef KERNEL + void mcount __P((fptrint_t frompc, fptrint_t selfpc)); + +#ifdef GUPROF +struct gmonparam; + +void nullfunc_loop_profiled __P((void)); +void nullfunc_profiled __P((void)); +void startguprof __P((struct gmonparam *p)); +void stopguprof __P((struct gmonparam *p)); #else +#define startguprof(p) +#define stopguprof(p) +#endif /* GUPROF */ + +#else /* !KERNEL */ + +#include <sys/cdefs.h> + +__BEGIN_DECLS void mcount __P((void)) __asm("mcount"); static void _mcount __P((fptrint_t frompc, fptrint_t selfpc)); -#endif +__END_DECLS + +#endif /* KERNEL */ #ifdef GUPROF -u_int cputime __P((void)); +/* XXX doesn't quite work outside kernel yet. */ +extern int cputime_bias; + +__BEGIN_DECLS +int cputime __P((void)); +void empty_loop __P((void)); void mexitcount __P((fptrint_t selfpc)); -#endif +void nullfunc __P((void)); +void nullfunc_loop __P((void)); __END_DECLS +#endif #endif /* !_MACHINE_PROFILE_H_ */ diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 67afe44..06d34ff 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.139 1996/08/27 19:45:54 pst Exp $ +# $Id: files.i386,v 1.140 1996/09/11 19:53:30 phk Exp $ # aic7xxx_asm optional ahc device-driver \ dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \ @@ -51,6 +51,7 @@ i386/i386/machdep.c standard i386/i386/math_emulate.c optional math_emulate i386/i386/mem.c standard i386/i386/microtime.s standard +i386/i386/perfmon.c optional perfmon profiling-routine i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard i386/i386/procfs_machdep.c standard diff --git a/sys/conf/options.i386 b/sys/conf/options.i386 index 3d3a3f700..7c09359 100644 --- a/sys/conf/options.i386 +++ b/sys/conf/options.i386 @@ -1,4 +1,4 @@ -# $Id: options.i386,v 1.22 1996/10/09 18:36:44 bde Exp $ +# $Id: options.i386,v 1.23 1996/10/09 19:47:07 bde Exp $ BOUNCEPAGES opt_bounce.h USER_LDT MATH_EMULATE opt_math_emulate.h @@ -18,6 +18,8 @@ COMCONSOLE opt_comconsole.h COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h DSI_SOFT_MODEM opt_sio.h +I586_CTR_GUPROF opt_i586_guprof.h +I586_PMC_GUPROF opt_i586_guprof.h FAT_CURSOR opt_pcvt.h PCVT_FREEBSD opt_pcvt.h PCVT_SCANSET opt_pcvt.h diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386 index 67afe44..06d34ff 100644 --- a/sys/i386/conf/files.i386 +++ b/sys/i386/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.139 1996/08/27 19:45:54 pst Exp $ +# $Id: files.i386,v 1.140 1996/09/11 19:53:30 phk Exp $ # aic7xxx_asm optional ahc device-driver \ dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \ @@ -51,6 +51,7 @@ i386/i386/machdep.c standard i386/i386/math_emulate.c optional math_emulate i386/i386/mem.c standard i386/i386/microtime.s standard +i386/i386/perfmon.c optional perfmon profiling-routine i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard i386/i386/procfs_machdep.c standard diff --git a/sys/i386/conf/options.i386 b/sys/i386/conf/options.i386 index 3d3a3f700..7c09359 100644 --- a/sys/i386/conf/options.i386 +++ b/sys/i386/conf/options.i386 @@ -1,4 +1,4 @@ -# $Id: options.i386,v 1.22 1996/10/09 18:36:44 bde Exp $ +# $Id: options.i386,v 1.23 1996/10/09 19:47:07 bde Exp $ BOUNCEPAGES opt_bounce.h USER_LDT MATH_EMULATE opt_math_emulate.h @@ -18,6 +18,8 @@ COMCONSOLE opt_comconsole.h COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h DSI_SOFT_MODEM opt_sio.h +I586_CTR_GUPROF opt_i586_guprof.h +I586_PMC_GUPROF opt_i586_guprof.h FAT_CURSOR opt_pcvt.h PCVT_FREEBSD opt_pcvt.h PCVT_SCANSET opt_pcvt.h diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h index 4b9d51b..08d0dbd 100644 --- a/sys/i386/include/profile.h +++ b/sys/i386/include/profile.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.6 1996/01/01 17:11:21 bde Exp $ + * $Id: profile.h,v 1.7 1996/08/28 20:15:25 bde Exp $ */ #ifndef _MACHINE_PROFILE_H_ @@ -97,18 +97,44 @@ typedef u_int fptrint_t; */ typedef int fptrdiff_t; -__BEGIN_DECLS #ifdef KERNEL + void mcount __P((fptrint_t frompc, fptrint_t selfpc)); + +#ifdef GUPROF +struct gmonparam; + +void nullfunc_loop_profiled __P((void)); +void nullfunc_profiled __P((void)); +void startguprof __P((struct gmonparam *p)); +void stopguprof __P((struct gmonparam *p)); #else +#define startguprof(p) +#define stopguprof(p) +#endif /* GUPROF */ + +#else /* !KERNEL */ + +#include <sys/cdefs.h> + +__BEGIN_DECLS void mcount __P((void)) __asm("mcount"); static void _mcount __P((fptrint_t frompc, fptrint_t selfpc)); -#endif +__END_DECLS + +#endif /* KERNEL */ #ifdef GUPROF -u_int cputime __P((void)); +/* XXX doesn't quite work outside kernel yet. */ +extern int cputime_bias; + +__BEGIN_DECLS +int cputime __P((void)); +void empty_loop __P((void)); void mexitcount __P((fptrint_t selfpc)); -#endif +void nullfunc __P((void)); +void nullfunc_loop __P((void)); __END_DECLS +#endif #endif /* !_MACHINE_PROFILE_H_ */ diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c index f140b84..62c8df5 100644 --- a/sys/i386/isa/prof_machdep.c +++ b/sys/i386/isa/prof_machdep.c @@ -1,17 +1,64 @@ -/* - * NEED A COPYRIGHT NOPTICE HERE +/*- + * Copyright (c) 1996 Bruce D. Evans. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * $Id$ + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: prof_machdep.c,v 1.2 1996/04/08 16:41:06 wollman Exp $ */ + +#ifdef GUPROF +#include "opt_cpu.h" +#include "opt_i586_guprof.h" +#include "opt_perfmon.h" + #include <sys/param.h> #include <sys/systm.h> +#include <sys/gmon.h> + #include <machine/clock.h> +#include <machine/perfmon.h> +#include <machine/profile.h> +#endif + #include <i386/isa/isa.h> #include <i386/isa/timerreg.h> #ifdef GUPROF -extern u_int cputime __P((void)); +#define CPUTIME_CLOCK_UNINITIALIZED 0 +#define CPUTIME_CLOCK_I8254 1 +#define CPUTIME_CLOCK_I586_CTR 2 +#define CPUTIME_CLOCK_I586_PMC 3 +#define CPUTIME_CLOCK_I8254_SHIFT 7 + +int cputime_bias = 1; /* initialize for locality of reference */ + +static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; +#ifdef I586_PMC_GUPROF +static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; +static int cputime_clock_pmc_init; +static struct gmonparam saved_gmp; #endif +#endif /* GUPROF */ #ifdef __GNUC__ asm(" @@ -52,13 +99,13 @@ Lgot_frompc: # movl (%esp),%eax - pushf + pushfl pushl %eax pushl %edx cli call _mcount addl $8,%esp - popf + popfl Lmcount_exit: ret "); @@ -94,12 +141,12 @@ mexitcount: pushl %edx pushl %eax movl 8(%esp),%eax - pushf + pushfl pushl %eax cli call _mexitcount addl $4,%esp - popf + popfl popl %eax popl %edx Lmexitcount_exit: @@ -113,20 +160,48 @@ Lmexitcount_exit: * Return the time elapsed since the last call. The units are machine- * dependent. */ -u_int +int cputime() { u_int count; - u_int delta; - u_char low; + int delta; +#ifdef I586_PMC_GUPROF + u_quad_t event_count; +#endif + u_char high, low; static u_int prev_count; +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_I586_CTR) { + count = (u_int)rdtsc(); + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#ifdef I586_PMC_GUPROF + if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + /* + * XXX permon_read() should be inlined so that the + * perfmon module doesn't need to be compiled with + * profiling disabled and so that it is fast. + */ + perfmon_read(0, &event_count); + + count = (u_int)event_count; + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#endif /* I586_PMC_GUPROF */ +#endif /* I586_CPU or I686_CPU */ + /* * Read the current value of the 8254 timer counter 0. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); - count = low | (inb(TIMER_CNTR0) << 8); + high = inb(TIMER_CNTR0); + count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; /* * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. @@ -140,10 +215,75 @@ cputime() delta = prev_count - count; prev_count = count; if ((int) delta <= 0) - return (delta + timer0_max_count); + return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); return (delta); } -#else /* not GUPROF */ + +/* + * The start and stop routines need not be here since we turn off profiling + * before calling them. They are here for convenience. + */ + +void +startguprof(gp) + struct gmonparam *gp; +{ + if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { + cputime_clock = CPUTIME_CLOCK_I8254; +#if defined(I586_CPU) || defined(I686_CPU) + if (i586_ctr_freq != 0) + cputime_clock = CPUTIME_CLOCK_I586_CTR; +#endif + } + gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_I586_CTR) + gp->profrate = i586_ctr_freq; +#ifdef I586_PMC_GUPROF + else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + if (perfmon_avail() && + perfmon_setup(0, cputime_clock_pmc_conf) == 0) { + if (perfmon_start(0) != 0) + perfmon_fini(0); + else { + /* XXX 1 event == 1 us. */ + gp->profrate = 1000000; + + saved_gmp = *gp; + + /* Zap overheads. They are invalid. */ + gp->cputime_overhead = 0; + gp->mcount_overhead = 0; + gp->mcount_post_overhead = 0; + gp->mcount_pre_overhead = 0; + gp->mexitcount_overhead = 0; + gp->mexitcount_post_overhead = 0; + gp->mexitcount_pre_overhead = 0; + + cputime_clock_pmc_init = TRUE; + } + } + } +#endif /* I586_PMC_GUPROF */ +#endif /* I586_CPU or I686_CPU */ + cputime_bias = 0; + cputime(); +} + +void +stopguprof(gp) + struct gmonparam *gp; +{ +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + if (cputime_clock_pmc_init) { + *gp = saved_gmp; + perfmon_fini(0); + cputime_clock_pmc_init = FALSE; + } +#endif +} + +#else /* !GUPROF */ #ifdef __GNUC__ asm(" .text diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index 0727f9b..d17a3b0 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93 - * $Id: subr_prof.c,v 1.15 1995/12/26 01:21:39 bde Exp $ + * $Id: subr_prof.c,v 1.16 1995/12/29 15:29:08 bde Exp $ */ #include <sys/param.h> @@ -56,6 +56,22 @@ struct gmonparam _gmonparam = { GMON_PROF_OFF }; extern char btext[]; extern char etext[]; +#ifdef GUPROF +void +nullfunc_loop_profiled() +{ + int i; + + for (i = 0; i < CALIB_SCALE; i++) + nullfunc_profiled(); +} + +void +nullfunc_profiled() +{ +} +#endif /* GUPROF */ + static void kmstartup(dummy) void *dummy; @@ -63,8 +79,14 @@ kmstartup(dummy) char *cp; struct gmonparam *p = &_gmonparam; #ifdef GUPROF - fptrint_t kmstartup_addr; + int cputime_overhead; + int empty_loop_time; int i; + fptrint_t kmstartup_addr; + int mcount_overhead; + int mexitcount_overhead; + int nullfunc_loop_overhead; + int nullfunc_loop_profiled_time; #endif /* @@ -74,7 +96,7 @@ kmstartup(dummy) p->lowpc = ROUNDDOWN((u_long)btext, HISTFRACTION * sizeof(HISTCOUNTER)); p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER)); p->textsize = p->highpc - p->lowpc; - printf("Profiling kernel, textsize=%d [%x..%x]\n", + printf("Profiling kernel, textsize=%lu [%x..%x]\n", p->textsize, p->lowpc, p->highpc); p->kcountsize = p->textsize / HISTFRACTION; p->hashfraction = HASHFRACTION; @@ -99,41 +121,56 @@ kmstartup(dummy) p->froms = (u_short *)cp; #ifdef GUPROF - /* - * Initialize pointers to overhead counters. - */ + /* Initialize pointers to overhead counters. */ p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime)); p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount)); p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount)); /* - * Determine overheads. + * Disable interrupts to avoid interference while we calibrate + * things. */ disable_intr(); - p->state = GMON_PROF_HIRES; - p->cputime_overhead = 0; - (void)cputime(); + /* + * Determine overheads. + * XXX this needs to be repeated for each useful timer/counter. + */ + cputime_overhead = 0; + startguprof(p); for (i = 0; i < CALIB_SCALE; i++) - p->cputime_overhead += cputime(); + cputime_overhead += cputime(); + + empty_loop(); + startguprof(p); + empty_loop(); + empty_loop_time = cputime(); + + nullfunc_loop_profiled(); + + /* + * Start profiling. There won't be any normal function calls since + * interrupts are disabled, but we will call the profiling routines + * directly to determine their overheads. + */ + p->state = GMON_PROF_HIRES; + + startguprof(p); + nullfunc_loop_profiled(); - (void)cputime(); + startguprof(p); for (i = 0; i < CALIB_SCALE; i++) #if defined(i386) && __GNUC__ >= 2 - /* - * Underestimate slightly by always calling __mcount, never - * mcount. - */ asm("pushl %0; call __mcount; popl %%ecx" : - : "i" (kmstartup) + : "i" (profil) : "ax", "bx", "cx", "dx", "memory"); #else #error #endif - p->mcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup)); + mcount_overhead = KCOUNT(p, PC_TO_I(p, profil)); - (void)cputime(); + startguprof(p); for (i = 0; i < CALIB_SCALE; i++) #if defined(i386) && __GNUC__ >= 2 asm("call mexitcount; 1:" @@ -142,25 +179,96 @@ kmstartup(dummy) #else #error #endif - p->mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr)); + mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr)); p->state = GMON_PROF_OFF; + stopguprof(p); + enable_intr(); - p->mcount_overhead_sub = p->mcount_overhead - p->cputime_overhead; - p->mexitcount_overhead_sub = p->mexitcount_overhead - - p->cputime_overhead; - printf("Profiling overheads: %u+%u %u+%u\n", - p->cputime_overhead, p->mcount_overhead_sub, - p->cputime_overhead, p->mexitcount_overhead_sub); - p->cputime_overhead_frac = p->cputime_overhead % CALIB_SCALE; - p->cputime_overhead /= CALIB_SCALE; - p->mcount_overhead_frac = p->mcount_overhead_sub % CALIB_SCALE; - p->mcount_overhead_sub /= CALIB_SCALE; - p->mcount_overhead /= CALIB_SCALE; - p->mexitcount_overhead_frac = p->mexitcount_overhead_sub % CALIB_SCALE; - p->mexitcount_overhead_sub /= CALIB_SCALE; - p->mexitcount_overhead /= CALIB_SCALE; + nullfunc_loop_profiled_time = 0; + for (i = 0; i < 28; i += sizeof(HISTCOUNTER)) { + int x; + + x = KCOUNT(p, PC_TO_I(p, + (fptrint_t)nullfunc_loop_profiled + i)); + nullfunc_loop_profiled_time += x; + printf("leaf[%d] = %d sum %d\n", + i, x, nullfunc_loop_profiled_time); + } +#define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE) +#define c2n(count, freq) ((int)((count) * 1000000000LL / freq)) + printf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n", + CALIB_DOSCALE(c2n(cputime_overhead, p->profrate)), + CALIB_DOSCALE(c2n(empty_loop_time, p->profrate)), + CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time, p->profrate)), + CALIB_DOSCALE(c2n(mcount_overhead, p->profrate)), + CALIB_DOSCALE(c2n(mexitcount_overhead, p->profrate))); + cputime_overhead -= empty_loop_time; + mcount_overhead -= empty_loop_time; + mexitcount_overhead -= empty_loop_time; + + /*- + * Profiling overheads are determined by the times between the + * following events: + * MC1: mcount() is called + * MC2: cputime() (called from mcount()) latches the timer + * MC3: mcount() completes + * ME1: mexitcount() is called + * ME2: cputime() (called from mexitcount()) latches the timer + * ME3: mexitcount() completes. + * The times between the events vary slightly depending on instruction + * combination and cache misses, etc. Attempt to determine the + * minimum times. These can be subtracted from the profiling times + * without much risk of reducing the profiling times below what they + * would be when profiling is not configured. Abbreviate: + * ab = minimum time between MC1 and MC3 + * a = minumum time between MC1 and MC2 + * b = minimum time between MC2 and MC3 + * cd = minimum time between ME1 and ME3 + * c = minimum time between ME1 and ME2 + * d = minimum time between ME2 and ME3. + * These satisfy the relations: + * ab <= mcount_overhead (just measured) + * a + b <= ab + * cd <= mexitcount_overhead (just measured) + * c + d <= cd + * a + d <= nullfunc_loop_profiled_time (just measured) + * a >= 0, b >= 0, c >= 0, d >= 0. + * Assume that ab and cd are equal to the minimums. + */ + p->cputime_overhead = CALIB_DOSCALE(cputime_overhead); + p->mcount_overhead = CALIB_DOSCALE(mcount_overhead - cputime_overhead); + p->mexitcount_overhead = CALIB_DOSCALE(mexitcount_overhead + - cputime_overhead); + nullfunc_loop_overhead = nullfunc_loop_profiled_time - empty_loop_time; + p->mexitcount_post_overhead = CALIB_DOSCALE((mcount_overhead + - nullfunc_loop_overhead) + / 4); + p->mexitcount_pre_overhead = p->mexitcount_overhead + + p->cputime_overhead + - p->mexitcount_post_overhead; + p->mcount_pre_overhead = CALIB_DOSCALE(nullfunc_loop_overhead) + - p->mexitcount_post_overhead; + p->mcount_post_overhead = p->mcount_overhead + + p->cputime_overhead + - p->mcount_pre_overhead; + printf( +"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n", + c2n(p->cputime_overhead, p->profrate), + c2n(p->mcount_overhead, p->profrate), + c2n(p->mcount_pre_overhead, p->profrate), + c2n(p->mcount_post_overhead, p->profrate), + c2n(p->cputime_overhead, p->profrate), + c2n(p->mexitcount_overhead, p->profrate), + c2n(p->mexitcount_pre_overhead, p->profrate), + c2n(p->mexitcount_post_overhead, p->profrate)); + printf( +"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n", + p->cputime_overhead, p->mcount_overhead, + p->mcount_pre_overhead, p->mcount_post_overhead, + p->cputime_overhead, p->mexitcount_overhead, + p->mexitcount_pre_overhead, p->mexitcount_post_overhead); #endif /* GUPROF */ } @@ -189,16 +297,20 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS if (!req->newptr) return (0); if (state == GMON_PROF_OFF) { - stopprofclock(&proc0); gp->state = state; + stopprofclock(&proc0); + stopguprof(gp); } else if (state == GMON_PROF_ON) { + gp->state = GMON_PROF_OFF; + stopguprof(gp); gp->profrate = profhz; - gp->state = state; startprofclock(&proc0); + gp->state = state; #ifdef GUPROF } else if (state == GMON_PROF_HIRES) { - gp->profrate = 1193182; /* XXX */ + gp->state = GMON_PROF_OFF; stopprofclock(&proc0); + startguprof(gp); gp->state = state; #endif } else if (state != gp->state) diff --git a/sys/libkern/mcount.c b/sys/libkern/mcount.c index 30cda2d..e7105d0 100644 --- a/sys/libkern/mcount.c +++ b/sys/libkern/mcount.c @@ -36,13 +36,12 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93"; #endif static const char rcsid[] = - "$Id: mcount.c,v 1.7 1996/05/02 14:20:33 phk Exp $"; + "$Id: mcount.c,v 1.8 1996/08/28 20:15:12 bde Exp $"; #endif #include <sys/param.h> #include <sys/gmon.h> #ifdef KERNEL -#include <sys/systm.h> #include <vm/vm.h> #include <vm/vm_param.h> #include <vm/pmap.h> @@ -71,7 +70,7 @@ _MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */ register fptrint_t frompc, selfpc; { #ifdef GUPROF - u_int delta; + int delta; #endif register fptrdiff_t frompci; register u_short *frompcindex; @@ -115,50 +114,33 @@ _MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */ #endif /* KERNEL */ #ifdef GUPROF - if (p->state != GMON_PROF_HIRES) - goto skip_guprof_stuff; - /* - * Look at the clock and add the count of clock cycles since the - * clock was last looked at to a counter for frompc. This - * solidifies the count for the function containing frompc and - * effectively starts another clock for the current function. - * The count for the new clock will be solidified when another - * function call is made or the function returns. - * - * We use the usual sampling counters since they can be located - * efficiently. 4-byte counters are usually necessary. - * - * There are many complications for subtracting the profiling - * overheads from the counts for normal functions and adding - * them to the counts for mcount(), mexitcount() and cputime(). - * We attempt to handle fractional cycles, but the overheads - * are usually underestimated because they are calibrated for - * a simpler than usual setup. - */ - delta = cputime() - p->mcount_overhead; - p->cputime_overhead_resid += p->cputime_overhead_frac; - p->mcount_overhead_resid += p->mcount_overhead_frac; - if ((int)delta < 0) - *p->mcount_count += delta + p->mcount_overhead - - p->cputime_overhead; - else if (delta != 0) { - if (p->cputime_overhead_resid >= CALIB_SCALE) { - p->cputime_overhead_resid -= CALIB_SCALE; - ++*p->cputime_count; - --delta; - } - if (delta != 0) { - if (p->mcount_overhead_resid >= CALIB_SCALE) { - p->mcount_overhead_resid -= CALIB_SCALE; - ++*p->mcount_count; - --delta; - } - KCOUNT(p, frompci) += delta; - } - *p->mcount_count += p->mcount_overhead_sub; + if (p->state == GMON_PROF_HIRES) { + /* + * Count the time since cputime() was previously called + * against `frompc'. Compensate for overheads. + * + * cputime() sets its prev_count variable to the count when + * it is called. This in effect starts a counter for + * the next period of execution (normally from now until + * the next call to mcount() or mexitcount()). We set + * cputime_bias to compensate for our own overhead. + * + * We use the usual sampling counters since they can be + * located efficiently. 4-byte counters are usually + * necessary. gprof will add up the scattered counts + * just like it does for statistical profiling. All + * counts are signed so that underflow in the subtractions + * doesn't matter much (negative counts are normally + * compensated for by larger counts elsewhere). Underflow + * shouldn't occur, but may be caused by slightly wrong + * calibrations or from not clearing cputime_bias. + */ + delta = cputime() - cputime_bias - p->mcount_pre_overhead; + cputime_bias = p->mcount_post_overhead; + KCOUNT(p, frompci) += delta; + *p->cputime_count += p->cputime_overhead; + *p->mcount_count += p->mcount_overhead; } - *p->cputime_count += p->cputime_overhead; -skip_guprof_stuff: #endif /* GUPROF */ #ifdef KERNEL @@ -290,36 +272,40 @@ mexitcount(selfpc) p = &_gmonparam; selfpcdiff = selfpc - (fptrint_t)p->lowpc; if (selfpcdiff < p->textsize) { - u_int delta; + int delta; /* - * Solidify the count for the current function. + * Count the time since cputime() was previously called + * against `selfpc'. Compensate for overheads. */ - delta = cputime() - p->mexitcount_overhead; - p->cputime_overhead_resid += p->cputime_overhead_frac; - p->mexitcount_overhead_resid += p->mexitcount_overhead_frac; - if ((int)delta < 0) - *p->mexitcount_count += delta + p->mexitcount_overhead - - p->cputime_overhead; - else if (delta != 0) { - if (p->cputime_overhead_resid >= CALIB_SCALE) { - p->cputime_overhead_resid -= CALIB_SCALE; - ++*p->cputime_count; - --delta; - } - if (delta != 0) { - if (p->mexitcount_overhead_resid - >= CALIB_SCALE) { - p->mexitcount_overhead_resid - -= CALIB_SCALE; - ++*p->mexitcount_count; - --delta; - } - KCOUNT(p, selfpcdiff) += delta; - } - *p->mexitcount_count += p->mexitcount_overhead_sub; - } + delta = cputime() - cputime_bias - p->mexitcount_pre_overhead; + cputime_bias = p->mexitcount_post_overhead; + KCOUNT(p, selfpcdiff) += delta; *p->cputime_count += p->cputime_overhead; + *p->mexitcount_count += p->mexitcount_overhead; } } + +void +empty_loop() +{ + int i; + + for (i = 0; i < CALIB_SCALE; i++) + ; +} + +void +nullfunc() +{ +} + +void +nullfunc_loop() +{ + int i; + + for (i = 0; i < CALIB_SCALE; i++) + nullfunc(); +} #endif /* GUPROF */ diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h index 619e94c..6d5334c 100644 --- a/sys/sys/gmon.h +++ b/sys/sys/gmon.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)gmon.h 8.2 (Berkeley) 1/4/94 - * $Id: gmon.h,v 1.7 1995/08/29 03:09:14 bde Exp $ + * $Id: gmon.h,v 1.8 1995/12/29 15:29:26 bde Exp $ */ #ifndef _SYS_GMON_H_ @@ -57,7 +57,7 @@ struct gmonhdr { * Type of histogram counters used in the kernel. */ #ifdef GPROF4 -#define HISTCOUNTER unsigned +#define HISTCOUNTER int #else #define HISTCOUNTER unsigned short #endif @@ -174,22 +174,17 @@ struct gmonparam { fptrint_t highpc; u_long textsize; u_long hashfraction; - u_long profrate; + int profrate; /* XXX wrong type to match gmonhdr */ HISTCOUNTER *cputime_count; - u_int cputime_overhead; - u_int cputime_overhead_frac; - u_int cputime_overhead_resid; - u_int cputime_overhead_sub; + int cputime_overhead; HISTCOUNTER *mcount_count; - u_int mcount_overhead; - u_int mcount_overhead_frac; - u_int mcount_overhead_resid; - u_int mcount_overhead_sub; + int mcount_overhead; + int mcount_post_overhead; + int mcount_pre_overhead; HISTCOUNTER *mexitcount_count; - u_int mexitcount_overhead; - u_int mexitcount_overhead_frac; - u_int mexitcount_overhead_resid; - u_int mexitcount_overhead_sub; + int mexitcount_overhead; + int mexitcount_post_overhead; + int mexitcount_pre_overhead; }; extern struct gmonparam _gmonparam; |