diff options
author | bde <bde@FreeBSD.org> | 1996-10-17 19:32:31 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 1996-10-17 19:32:31 +0000 |
commit | a0f16401c5c8aa8537932ccc296e9253fca3debd (patch) | |
tree | 2f3c01bb1684b82e42c63c0a17d658ba5ded662b /sys/i386 | |
parent | d0d507caa9d6718e7f6699bae600f120362f59b7 (diff) | |
download | FreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.zip FreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.tar.gz |
Improved non-statistical (GUPROF) profiling:
- use a more accurate and more efficient method of compensating for
overheads. The old method counted too much time against leaf
functions.
- normally use the Pentium timestamp counter if available.
On Pentiums, the times are now accurate to within a couple of cpu
clock cycles per function call in the (unlikely) event that there
are no cache misses in or caused by the profiling code.
- optionally use an arbitrary Pentium event counter if available.
- optionally regress to using the i8254 counter.
- scaled the i8254 counter by a factor of 128. Now the i8254 counters
overflow slightly faster than the TSC counters for a 150MHz Pentium :-)
(after about 16 seconds). This is to avoid fractional overheads.
files.i386:
permon.c temporarily has to be classified as a profiling-routine
because a couple of functions in it may be called from profiling code.
options.i386:
- I586_CTR_GUPROF is currently unused (oops).
- I586_PMC_GUPROF should be something like 0x70000 to enable (but not
use unless prof_machdep.c is changed) support for Pentium event
counters. 7 is a control mode and the counter number 0 is somewhere
in the 0000 bits (see perfmon.h for the encoding).
profile.h:
- added declarations.
- cleaned up separation of user mode declarations.
prof_machdep.c:
Mostly clock-select changes. The default clock can be changed by
editing kmem. There should be a sysctl for this.
subr_prof.c:
- added copyright.
- calibrate overheads for the new method.
- documented new method.
- fixed races and and machine dependencies in start/stop code.
mcount.c:
Use the new overhead compensation method.
gmon.h:
- changed GPROF4 counter type from unsigned to int. Oops, this should
be machine-dependent and/or int32_t.
- reorganized overhead counters.
Submitted by: Pentium event counter changes mostly by wollman
Diffstat (limited to 'sys/i386')
-rw-r--r-- | sys/i386/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/i386/conf/options.i386 | 4 | ||||
-rw-r--r-- | sys/i386/include/profile.h | 36 | ||||
-rw-r--r-- | sys/i386/isa/prof_machdep.c | 168 |
4 files changed, 190 insertions, 21 deletions
diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386 index 67afe44..06d34ff 100644 --- a/sys/i386/conf/files.i386 +++ b/sys/i386/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.139 1996/08/27 19:45:54 pst Exp $ +# $Id: files.i386,v 1.140 1996/09/11 19:53:30 phk Exp $ # aic7xxx_asm optional ahc device-driver \ dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \ @@ -51,6 +51,7 @@ i386/i386/machdep.c standard i386/i386/math_emulate.c optional math_emulate i386/i386/mem.c standard i386/i386/microtime.s standard +i386/i386/perfmon.c optional perfmon profiling-routine i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard i386/i386/procfs_machdep.c standard diff --git a/sys/i386/conf/options.i386 b/sys/i386/conf/options.i386 index 3d3a3f700..7c09359 100644 --- a/sys/i386/conf/options.i386 +++ b/sys/i386/conf/options.i386 @@ -1,4 +1,4 @@ -# $Id: options.i386,v 1.22 1996/10/09 18:36:44 bde Exp $ +# $Id: options.i386,v 1.23 1996/10/09 19:47:07 bde Exp $ BOUNCEPAGES opt_bounce.h USER_LDT MATH_EMULATE opt_math_emulate.h @@ -18,6 +18,8 @@ COMCONSOLE opt_comconsole.h COM_ESP opt_sio.h COM_MULTIPORT opt_sio.h DSI_SOFT_MODEM opt_sio.h +I586_CTR_GUPROF opt_i586_guprof.h +I586_PMC_GUPROF opt_i586_guprof.h FAT_CURSOR opt_pcvt.h PCVT_FREEBSD opt_pcvt.h PCVT_SCANSET opt_pcvt.h diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h index 4b9d51b..08d0dbd 100644 --- a/sys/i386/include/profile.h +++ b/sys/i386/include/profile.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.6 1996/01/01 17:11:21 bde Exp $ + * $Id: profile.h,v 1.7 1996/08/28 20:15:25 bde Exp $ */ #ifndef _MACHINE_PROFILE_H_ @@ -97,18 +97,44 @@ typedef u_int fptrint_t; */ typedef int fptrdiff_t; -__BEGIN_DECLS #ifdef KERNEL + void mcount __P((fptrint_t frompc, fptrint_t selfpc)); + +#ifdef GUPROF +struct gmonparam; + +void nullfunc_loop_profiled __P((void)); +void nullfunc_profiled __P((void)); +void startguprof __P((struct gmonparam *p)); +void stopguprof __P((struct gmonparam *p)); #else +#define startguprof(p) +#define stopguprof(p) +#endif /* GUPROF */ + +#else /* !KERNEL */ + +#include <sys/cdefs.h> + +__BEGIN_DECLS void mcount __P((void)) __asm("mcount"); static void _mcount __P((fptrint_t frompc, fptrint_t selfpc)); -#endif +__END_DECLS + +#endif /* KERNEL */ #ifdef GUPROF -u_int cputime __P((void)); +/* XXX doesn't quite work outside kernel yet. */ +extern int cputime_bias; + +__BEGIN_DECLS +int cputime __P((void)); +void empty_loop __P((void)); void mexitcount __P((fptrint_t selfpc)); -#endif +void nullfunc __P((void)); +void nullfunc_loop __P((void)); __END_DECLS +#endif #endif /* !_MACHINE_PROFILE_H_ */ diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c index f140b84..62c8df5 100644 --- a/sys/i386/isa/prof_machdep.c +++ b/sys/i386/isa/prof_machdep.c @@ -1,17 +1,64 @@ -/* - * NEED A COPYRIGHT NOPTICE HERE +/*- + * Copyright (c) 1996 Bruce D. Evans. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. * - * $Id$ + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: prof_machdep.c,v 1.2 1996/04/08 16:41:06 wollman Exp $ */ + +#ifdef GUPROF +#include "opt_cpu.h" +#include "opt_i586_guprof.h" +#include "opt_perfmon.h" + #include <sys/param.h> #include <sys/systm.h> +#include <sys/gmon.h> + #include <machine/clock.h> +#include <machine/perfmon.h> +#include <machine/profile.h> +#endif + #include <i386/isa/isa.h> #include <i386/isa/timerreg.h> #ifdef GUPROF -extern u_int cputime __P((void)); +#define CPUTIME_CLOCK_UNINITIALIZED 0 +#define CPUTIME_CLOCK_I8254 1 +#define CPUTIME_CLOCK_I586_CTR 2 +#define CPUTIME_CLOCK_I586_PMC 3 +#define CPUTIME_CLOCK_I8254_SHIFT 7 + +int cputime_bias = 1; /* initialize for locality of reference */ + +static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED; +#ifdef I586_PMC_GUPROF +static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; +static int cputime_clock_pmc_init; +static struct gmonparam saved_gmp; #endif +#endif /* GUPROF */ #ifdef __GNUC__ asm(" @@ -52,13 +99,13 @@ Lgot_frompc: # movl (%esp),%eax - pushf + pushfl pushl %eax pushl %edx cli call _mcount addl $8,%esp - popf + popfl Lmcount_exit: ret "); @@ -94,12 +141,12 @@ mexitcount: pushl %edx pushl %eax movl 8(%esp),%eax - pushf + pushfl pushl %eax cli call _mexitcount addl $4,%esp - popf + popfl popl %eax popl %edx Lmexitcount_exit: @@ -113,20 +160,48 @@ Lmexitcount_exit: * Return the time elapsed since the last call. The units are machine- * dependent. */ -u_int +int cputime() { u_int count; - u_int delta; - u_char low; + int delta; +#ifdef I586_PMC_GUPROF + u_quad_t event_count; +#endif + u_char high, low; static u_int prev_count; +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_I586_CTR) { + count = (u_int)rdtsc(); + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#ifdef I586_PMC_GUPROF + if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + /* + * XXX permon_read() should be inlined so that the + * perfmon module doesn't need to be compiled with + * profiling disabled and so that it is fast. + */ + perfmon_read(0, &event_count); + + count = (u_int)event_count; + delta = (int)(count - prev_count); + prev_count = count; + return (delta); + } +#endif /* I586_PMC_GUPROF */ +#endif /* I586_CPU or I686_CPU */ + /* * Read the current value of the 8254 timer counter 0. */ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); low = inb(TIMER_CNTR0); - count = low | (inb(TIMER_CNTR0) << 8); + high = inb(TIMER_CNTR0); + count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT; /* * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. @@ -140,10 +215,75 @@ cputime() delta = prev_count - count; prev_count = count; if ((int) delta <= 0) - return (delta + timer0_max_count); + return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT)); return (delta); } -#else /* not GUPROF */ + +/* + * The start and stop routines need not be here since we turn off profiling + * before calling them. They are here for convenience. + */ + +void +startguprof(gp) + struct gmonparam *gp; +{ + if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { + cputime_clock = CPUTIME_CLOCK_I8254; +#if defined(I586_CPU) || defined(I686_CPU) + if (i586_ctr_freq != 0) + cputime_clock = CPUTIME_CLOCK_I586_CTR; +#endif + } + gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_I586_CTR) + gp->profrate = i586_ctr_freq; +#ifdef I586_PMC_GUPROF + else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { + if (perfmon_avail() && + perfmon_setup(0, cputime_clock_pmc_conf) == 0) { + if (perfmon_start(0) != 0) + perfmon_fini(0); + else { + /* XXX 1 event == 1 us. */ + gp->profrate = 1000000; + + saved_gmp = *gp; + + /* Zap overheads. They are invalid. */ + gp->cputime_overhead = 0; + gp->mcount_overhead = 0; + gp->mcount_post_overhead = 0; + gp->mcount_pre_overhead = 0; + gp->mexitcount_overhead = 0; + gp->mexitcount_post_overhead = 0; + gp->mexitcount_pre_overhead = 0; + + cputime_clock_pmc_init = TRUE; + } + } + } +#endif /* I586_PMC_GUPROF */ +#endif /* I586_CPU or I686_CPU */ + cputime_bias = 0; + cputime(); +} + +void +stopguprof(gp) + struct gmonparam *gp; +{ +#if defined(PERFMON) && defined(I586_PMC_GUPROF) + if (cputime_clock_pmc_init) { + *gp = saved_gmp; + perfmon_fini(0); + cputime_clock_pmc_init = FALSE; + } +#endif +} + +#else /* !GUPROF */ #ifdef __GNUC__ asm(" .text |