diff options
author | bde <bde@FreeBSD.org> | 1995-12-29 15:30:05 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 1995-12-29 15:30:05 +0000 |
commit | 586cc683d875b37dce82c825feb9ccc7d884b35e (patch) | |
tree | 9ce1e55534d3d930aead3ff55aeb7fcedbc086a4 /sys | |
parent | ff6f507f6bbb3fda77fb14c7201db37bafea7a3f (diff) | |
download | FreeBSD-src-586cc683d875b37dce82c825feb9ccc7d884b35e.zip FreeBSD-src-586cc683d875b37dce82c825feb9ccc7d884b35e.tar.gz |
Implemented non-statistical kernel profiling. This is based on
looking at a high resolution clock for each of the following events:
function call, function return, interrupt entry, interrupt exit,
and interesting branches. The differences between the times of
these events are added at appropriate places in a ordinary histogram
(as if very fast statistical profiling sampled the pc at those
places) so that ordinary gprof can be used to analyze the times.
gmon.h:
Histogram counters need to be 4 bytes for microsecond resolutions.
They will need to be larger for the 586 clock.
The comments were vax-centric and wrong even on vaxes. Does anyone
disagree?
gprof4.c:
The standard gprof should support counters of all integral sizes
and the size of the counter should be in the gmon header. This
hack will do until then. (Use gprof4 -u to examine the results
of non-statistical profiling.)
config/*:
Non-statistical profiling is configured with `config -pp'.
`config -p' still gives ordinary profiling.
kgmon/*:
Non-statistical profiling is enabled with `kgmon -B'. `kgmon -b'
still enables ordinary profiling (and distables non-statistical
profiling) if non-statistical profiling is configured.
Diffstat (limited to 'sys')
-rw-r--r-- | sys/amd64/amd64/prof_machdep.c | 153 | ||||
-rw-r--r-- | sys/amd64/include/asmacros.h | 92 | ||||
-rw-r--r-- | sys/amd64/include/profile.h | 54 | ||||
-rw-r--r-- | sys/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/i386/conf/files.i386 | 3 | ||||
-rw-r--r-- | sys/i386/include/asmacros.h | 92 | ||||
-rw-r--r-- | sys/i386/include/profile.h | 54 | ||||
-rw-r--r-- | sys/i386/isa/prof_machdep.c | 153 | ||||
-rw-r--r-- | sys/kern/subr_prof.c | 95 | ||||
-rw-r--r-- | sys/libkern/mcount.c | 182 | ||||
-rw-r--r-- | sys/sys/gmon.h | 64 |
11 files changed, 665 insertions, 280 deletions
diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c new file mode 100644 index 0000000..2aa6787 --- /dev/null +++ b/sys/amd64/amd64/prof_machdep.c @@ -0,0 +1,153 @@ +#include <sys/param.h> +#include <sys/systm.h> +#include <machine/clock.h> +#include <i386/isa/isa.h> +#include <i386/isa/timerreg.h> + +#ifdef GUPROF +extern u_int cputime __P((void)); +#endif + +#ifdef __GNUC__ +asm(" +GM_STATE = 0 +GMON_PROF_OFF = 3 + + .text + .align 4,0x90 + .globl __mcount +__mcount: + # + # Check that we are profiling. Do it early for speed. + # + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # __mcount is the same as mcount except the caller hasn't changed + # the stack except to call here, so the caller's raddr is above + # our raddr. + # + movl 4(%esp),%edx + jmp Lgot_frompc + + .align 4,0x90 + .globl mcount +mcount: + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # The caller's stack frame has already been built, so %ebp is + # the caller's frame pointer. The caller's raddr is in the + # caller's frame following the caller's caller's frame pointer. + # + movl 4(%ebp),%edx +Lgot_frompc: + # + # Our raddr is the caller's pc. + # + movl (%esp),%eax + + pushf + pushl %eax + pushl %edx + cli + call _mcount + addl $8,%esp + popf +Lmcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +#ifdef GUPROF +/* + * mexitcount saves the return register(s), loads selfpc and calls + * mexitcount(selfpc) to do the work. Someday it should be in a machine + * dependent file together with cputime(), __mcount and mcount. cputime() + * can't just be put in machdep.c because it has to be compiled without -pg. + */ +#ifdef __GNUC__ +asm(" + .text +# +# Dummy label to be seen when gprof -u hides mexitcount. +# + .align 4,0x90 + .globl __mexitcount +__mexitcount: + nop + +GMON_PROF_HIRES = 4 + + .align 4,0x90 + .globl mexitcount +mexitcount: + cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE + jne Lmexitcount_exit + pushl %edx + pushl %eax + movl 8(%esp),%eax + pushf + pushl %eax + cli + call _mexitcount + addl $4,%esp + popf + popl %eax + popl %edx +Lmexitcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +/* + * Return the time elapsed since the last call. The units are machine- + * dependent. + */ +u_int +cputime() +{ + u_int count; + u_int delta; + u_char low; + static u_int prev_count; + + /* + * Read the current value of the 8254 timer counter 0. + */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); + low = inb(TIMER_CNTR0); + count = low | (inb(TIMER_CNTR0) << 8); + + /* + * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. + * While profiling is enabled, this routine is called at least twice + * per timer reset (for mcounting and mexitcounting hardclock()), + * so at most one reset has occurred since the last call, and one + * has occurred iff the current count is larger than the previous + * count. This allows counter underflow to be detected faster + * than in microtime(). + */ + delta = prev_count - count; + prev_count = count; + if ((int) delta <= 0) + return (delta + timer0_max_count); + return (delta); +} +#else /* not GUPROF */ +#ifdef __GNUC__ +asm(" + .text + .align 4,0x90 + .globl mexitcount +mexitcount: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ +#endif /* GUPROF */ diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h index b2a6dc8..8776ccf 100644 --- a/sys/amd64/include/asmacros.h +++ b/sys/amd64/include/asmacros.h @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $ + * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $ */ #ifndef _MACHINE_ASMACROS_H_ @@ -38,47 +38,83 @@ #ifdef KERNEL +/* XXX too much duplication in various asm*.h's and gprof.h's */ + #define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */ #define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */ -#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: -#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) - -/* These three are place holders for future changes to the profiling code */ -#define MCOUNT_LABEL(name) -#define MEXITCOUNT -#define FAKE_MCOUNT(caller) +#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(name) #ifdef GPROF /* - * ALTENTRY() must be before a corresponding ENTRY() so that it can jump - * over the mcounting. - */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f -#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: -/* - * The call to mcount supports the usual (bad) conventions. We allocate - * some data and pass a pointer to it although the FreeBSD doesn't use - * the data. We set up a frame before calling mcount because that is - * the standard convention although it makes work for both mcount and - * callers. + * __mcount is like mcount except that doesn't require its caller to set + * up a frame pointer. It must be called before pushing anything onto the + * stack. gcc should eventually generate code to call __mcount in most + * cases. This would make -pg in combination with -fomit-frame-pointer + * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to + * allow profiling before setting up the frame pointer, but this is + * inadequate for good handling of special cases, e.g., -fpic works best + * with profiling after the prologue. + * + * Neither __mcount nor mcount requires %eax to point to 4 bytes of data, + * so don't waste space allocating the data or time setting it up. Changes + * to avoid the wastage in gcc-2.4.5-compiled code are available. + * + * mexitcount is a new profiling feature to allow accurate timing of all + * functions if an accurate clock is available. Changes to gcc-2.4.5 to + * support it are are available. The changes currently don't allow not + * generating mexitcounts for non-kernel code. It is best to call + * mexitcount right at the end of a function like the MEXITCOUNT macro + * does, but the changes to gcc only implement calling it as the first + * thing in the epilogue to avoid problems with -fpic. + * + * mcount and __mexitcount may clobber the call-used registers and %ef. + * mexitcount may clobber %ecx and %ef. + * + * Cross-jumping makes accurate timing more difficult. It is handled in + * many cases by calling mexitcount before jumping. It is not handled + * for some conditional jumps (e.g., in bcopyx) or for some fault-handling + * jumps. It is handled for some fault-handling jumps by not sharing the + * exit routine. + * + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to + * the main entry point. Note that alt entries are counted twice. They + * have to be counted as ordinary entries for gprof to get the call times + * right for the ordinary entries. + * + * High local labels are used in macros to avoid clashes with local labels + * in functions. + * + * "ret" is used instead of "RET" because there are a lot of "ret"s. + * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this + * file is preprocessed in traditional mode). "ret" clobbers eflags + * but this doesn't matter. */ -#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ - pushl %ebp; movl %esp,%ebp; \ - movl $1b,%eax; call mcount; popl %ebp -#else +#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f +#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT +#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx +#define MCOUNT call __mcount +#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT +#define MEXITCOUNT call mexitcount +#define ret MEXITCOUNT ; .byte 0xc3 +#else /* not GPROF */ /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. - * If there is a previous ALTENTRY() then the alignment code is empty. + * If there is a previous ALTENTRY() then the alignment code for ENTRY() + * is empty. */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name) -#define ENTRY(name) GEN_ENTRY(_/**/name) +#define ALTENTRY(name) GEN_ENTRY(name) +#define ENTRY(name) GEN_ENTRY(name) +#define FAKE_MCOUNT(caller) #define MCOUNT +#define MCOUNT_LABEL(name) +#define MEXITCOUNT +#endif /* GPROF */ -#endif - +/* XXX NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h index 9fe27ec..c55d629 100644 --- a/sys/amd64/include/profile.h +++ b/sys/amd64/include/profile.h @@ -31,35 +31,59 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $ + * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $ */ -#ifndef _I386_MACHINE_PROFILE_H_ -#define _I386_MACHINE_PROFILE_H_ +#ifndef _MACHINE_PROFILE_H_ +#define _MACHINE_PROFILE_H_ +#if 0 #define _MCOUNT_DECL static inline void _mcount #define MCOUNT \ extern void mcount() asm("mcount"); void mcount() { \ - int selfpc, frompcindex; \ + fptrint_t selfpc, frompc; \ /* \ - * find the return address for mcount, \ + * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ - * selfpc = pc pushed by mcount call \ + * selfpc = pc pushed by call to mcount \ */ \ asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ /* \ - * frompcindex = pc pushed by jsr into self. \ - * In GCC the caller's stack frame has already been built so we \ - * have to chase a6 to find caller's raddr. \ + * frompc = pc pushed by call to mcount's caller. \ + * The caller's stack frame has already been built, so %ebp is \ + * the caller's frame pointer. The caller's raddr is in the \ + * caller's frame following the caller's caller's frame pointer. \ */ \ - asm("movl (%%ebp),%0" : "=r" (frompcindex)); \ - frompcindex = ((int *)frompcindex)[1]; \ - _mcount(frompcindex, selfpc); \ + asm("movl (%%ebp),%0" : "=r" (frompc)); \ + frompc = ((fptrint_t *)frompc)[1]; \ + _mcount(frompc, selfpc); \ } +#else +#define _MCOUNT_DECL void mcount +#define MCOUNT +#endif -#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr() -#define MCOUNT_EXIT write_eflags(save_eflags) +#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); } +#define MCOUNT_EXIT (write_eflags(save_eflags)) -#endif +#define CALIB_SCALE 1000 +#define KCOUNT(p,index) ((p)->kcount[(index) \ + / (HISTFRACTION * sizeof(*(p)->kcount))]) +#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc) + +/* An unsigned integral type that can hold function pointers. */ +typedef u_int fptrint_t; + +/* + * An unsigned integral type that can hold non-negative difference between + * function pointers. + */ +typedef int fptrdiff_t; + +u_int cputime __P((void)); +void mcount __P((fptrint_t frompc, fptrint_t selfpc)); +void mexitcount __P((fptrint_t selfpc)); + +#endif /* !MACHINE_PROFILE_H */ diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index fda656d..1633ed8 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.120 1995/12/26 12:50:01 bde Exp $ +# $Id: files.i386,v 1.121 1995/12/26 13:57:56 bde Exp $ # aic7xxx_asm optional ahc device-driver \ dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \ @@ -115,6 +115,7 @@ i386/isa/pcvt/pcvt_kbd.c optional vt device-driver i386/isa/pcvt/pcvt_out.c optional vt device-driver i386/isa/pcvt/pcvt_sup.c optional vt device-driver i386/isa/pcvt/pcvt_vtf.c optional vt device-driver +i386/isa/prof_machdep.c optional profiling-routine i386/isa/psm.c optional psm device-driver i386/isa/random_machdep.c standard i386/isa/rc.c optional rc device-driver diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386 index fda656d..1633ed8 100644 --- a/sys/i386/conf/files.i386 +++ b/sys/i386/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.120 1995/12/26 12:50:01 bde Exp $ +# $Id: files.i386,v 1.121 1995/12/26 13:57:56 bde Exp $ # aic7xxx_asm optional ahc device-driver \ dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \ @@ -115,6 +115,7 @@ i386/isa/pcvt/pcvt_kbd.c optional vt device-driver i386/isa/pcvt/pcvt_out.c optional vt device-driver i386/isa/pcvt/pcvt_sup.c optional vt device-driver i386/isa/pcvt/pcvt_vtf.c optional vt device-driver +i386/isa/prof_machdep.c optional profiling-routine i386/isa/psm.c optional psm device-driver i386/isa/random_machdep.c standard i386/isa/rc.c optional rc device-driver diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h index b2a6dc8..8776ccf 100644 --- a/sys/i386/include/asmacros.h +++ b/sys/i386/include/asmacros.h @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $ + * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $ */ #ifndef _MACHINE_ASMACROS_H_ @@ -38,47 +38,83 @@ #ifdef KERNEL +/* XXX too much duplication in various asm*.h's and gprof.h's */ + #define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */ #define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */ -#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: -#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) - -/* These three are place holders for future changes to the profiling code */ -#define MCOUNT_LABEL(name) -#define MEXITCOUNT -#define FAKE_MCOUNT(caller) +#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(name) #ifdef GPROF /* - * ALTENTRY() must be before a corresponding ENTRY() so that it can jump - * over the mcounting. - */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f -#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: -/* - * The call to mcount supports the usual (bad) conventions. We allocate - * some data and pass a pointer to it although the FreeBSD doesn't use - * the data. We set up a frame before calling mcount because that is - * the standard convention although it makes work for both mcount and - * callers. + * __mcount is like mcount except that doesn't require its caller to set + * up a frame pointer. It must be called before pushing anything onto the + * stack. gcc should eventually generate code to call __mcount in most + * cases. This would make -pg in combination with -fomit-frame-pointer + * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to + * allow profiling before setting up the frame pointer, but this is + * inadequate for good handling of special cases, e.g., -fpic works best + * with profiling after the prologue. + * + * Neither __mcount nor mcount requires %eax to point to 4 bytes of data, + * so don't waste space allocating the data or time setting it up. Changes + * to avoid the wastage in gcc-2.4.5-compiled code are available. + * + * mexitcount is a new profiling feature to allow accurate timing of all + * functions if an accurate clock is available. Changes to gcc-2.4.5 to + * support it are are available. The changes currently don't allow not + * generating mexitcounts for non-kernel code. It is best to call + * mexitcount right at the end of a function like the MEXITCOUNT macro + * does, but the changes to gcc only implement calling it as the first + * thing in the epilogue to avoid problems with -fpic. + * + * mcount and __mexitcount may clobber the call-used registers and %ef. + * mexitcount may clobber %ecx and %ef. + * + * Cross-jumping makes accurate timing more difficult. It is handled in + * many cases by calling mexitcount before jumping. It is not handled + * for some conditional jumps (e.g., in bcopyx) or for some fault-handling + * jumps. It is handled for some fault-handling jumps by not sharing the + * exit routine. + * + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to + * the main entry point. Note that alt entries are counted twice. They + * have to be counted as ordinary entries for gprof to get the call times + * right for the ordinary entries. + * + * High local labels are used in macros to avoid clashes with local labels + * in functions. + * + * "ret" is used instead of "RET" because there are a lot of "ret"s. + * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this + * file is preprocessed in traditional mode). "ret" clobbers eflags + * but this doesn't matter. */ -#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ - pushl %ebp; movl %esp,%ebp; \ - movl $1b,%eax; call mcount; popl %ebp -#else +#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f +#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT +#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx +#define MCOUNT call __mcount +#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT +#define MEXITCOUNT call mexitcount +#define ret MEXITCOUNT ; .byte 0xc3 +#else /* not GPROF */ /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. - * If there is a previous ALTENTRY() then the alignment code is empty. + * If there is a previous ALTENTRY() then the alignment code for ENTRY() + * is empty. */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name) -#define ENTRY(name) GEN_ENTRY(_/**/name) +#define ALTENTRY(name) GEN_ENTRY(name) +#define ENTRY(name) GEN_ENTRY(name) +#define FAKE_MCOUNT(caller) #define MCOUNT +#define MCOUNT_LABEL(name) +#define MEXITCOUNT +#endif /* GPROF */ -#endif - +/* XXX NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h index 9fe27ec..c55d629 100644 --- a/sys/i386/include/profile.h +++ b/sys/i386/include/profile.h @@ -31,35 +31,59 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $ + * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $ */ -#ifndef _I386_MACHINE_PROFILE_H_ -#define _I386_MACHINE_PROFILE_H_ +#ifndef _MACHINE_PROFILE_H_ +#define _MACHINE_PROFILE_H_ +#if 0 #define _MCOUNT_DECL static inline void _mcount #define MCOUNT \ extern void mcount() asm("mcount"); void mcount() { \ - int selfpc, frompcindex; \ + fptrint_t selfpc, frompc; \ /* \ - * find the return address for mcount, \ + * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ - * selfpc = pc pushed by mcount call \ + * selfpc = pc pushed by call to mcount \ */ \ asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ /* \ - * frompcindex = pc pushed by jsr into self. \ - * In GCC the caller's stack frame has already been built so we \ - * have to chase a6 to find caller's raddr. \ + * frompc = pc pushed by call to mcount's caller. \ + * The caller's stack frame has already been built, so %ebp is \ + * the caller's frame pointer. The caller's raddr is in the \ + * caller's frame following the caller's caller's frame pointer. \ */ \ - asm("movl (%%ebp),%0" : "=r" (frompcindex)); \ - frompcindex = ((int *)frompcindex)[1]; \ - _mcount(frompcindex, selfpc); \ + asm("movl (%%ebp),%0" : "=r" (frompc)); \ + frompc = ((fptrint_t *)frompc)[1]; \ + _mcount(frompc, selfpc); \ } +#else +#define _MCOUNT_DECL void mcount +#define MCOUNT +#endif -#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr() -#define MCOUNT_EXIT write_eflags(save_eflags) +#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); } +#define MCOUNT_EXIT (write_eflags(save_eflags)) -#endif +#define CALIB_SCALE 1000 +#define KCOUNT(p,index) ((p)->kcount[(index) \ + / (HISTFRACTION * sizeof(*(p)->kcount))]) +#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc) + +/* An unsigned integral type that can hold function pointers. */ +typedef u_int fptrint_t; + +/* + * An unsigned integral type that can hold non-negative difference between + * function pointers. + */ +typedef int fptrdiff_t; + +u_int cputime __P((void)); +void mcount __P((fptrint_t frompc, fptrint_t selfpc)); +void mexitcount __P((fptrint_t selfpc)); + +#endif /* !MACHINE_PROFILE_H */ diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c new file mode 100644 index 0000000..2aa6787 --- /dev/null +++ b/sys/i386/isa/prof_machdep.c @@ -0,0 +1,153 @@ +#include <sys/param.h> +#include <sys/systm.h> +#include <machine/clock.h> +#include <i386/isa/isa.h> +#include <i386/isa/timerreg.h> + +#ifdef GUPROF +extern u_int cputime __P((void)); +#endif + +#ifdef __GNUC__ +asm(" +GM_STATE = 0 +GMON_PROF_OFF = 3 + + .text + .align 4,0x90 + .globl __mcount +__mcount: + # + # Check that we are profiling. Do it early for speed. + # + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # __mcount is the same as mcount except the caller hasn't changed + # the stack except to call here, so the caller's raddr is above + # our raddr. + # + movl 4(%esp),%edx + jmp Lgot_frompc + + .align 4,0x90 + .globl mcount +mcount: + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # The caller's stack frame has already been built, so %ebp is + # the caller's frame pointer. The caller's raddr is in the + # caller's frame following the caller's caller's frame pointer. + # + movl 4(%ebp),%edx +Lgot_frompc: + # + # Our raddr is the caller's pc. + # + movl (%esp),%eax + + pushf + pushl %eax + pushl %edx + cli + call _mcount + addl $8,%esp + popf +Lmcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +#ifdef GUPROF +/* + * mexitcount saves the return register(s), loads selfpc and calls + * mexitcount(selfpc) to do the work. Someday it should be in a machine + * dependent file together with cputime(), __mcount and mcount. cputime() + * can't just be put in machdep.c because it has to be compiled without -pg. + */ +#ifdef __GNUC__ +asm(" + .text +# +# Dummy label to be seen when gprof -u hides mexitcount. +# + .align 4,0x90 + .globl __mexitcount +__mexitcount: + nop + +GMON_PROF_HIRES = 4 + + .align 4,0x90 + .globl mexitcount +mexitcount: + cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE + jne Lmexitcount_exit + pushl %edx + pushl %eax + movl 8(%esp),%eax + pushf + pushl %eax + cli + call _mexitcount + addl $4,%esp + popf + popl %eax + popl %edx +Lmexitcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +/* + * Return the time elapsed since the last call. The units are machine- + * dependent. + */ +u_int +cputime() +{ + u_int count; + u_int delta; + u_char low; + static u_int prev_count; + + /* + * Read the current value of the 8254 timer counter 0. + */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); + low = inb(TIMER_CNTR0); + count = low | (inb(TIMER_CNTR0) << 8); + + /* + * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. + * While profiling is enabled, this routine is called at least twice + * per timer reset (for mcounting and mexitcounting hardclock()), + * so at most one reset has occurred since the last call, and one + * has occurred iff the current count is larger than the previous + * count. This allows counter underflow to be detected faster + * than in microtime(). + */ + delta = prev_count - count; + prev_count = count; + if ((int) delta <= 0) + return (delta + timer0_max_count); + return (delta); +} +#else /* not GUPROF */ +#ifdef __GNUC__ +asm(" + .text + .align 4,0x90 + .globl mexitcount +mexitcount: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ +#endif /* GUPROF */ diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index b9c1ae2..0727f9b 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)subr_prof.c 8.3 (Berkeley) 9/23/93 - * $Id: subr_prof.c,v 1.14 1995/12/14 08:31:44 phk Exp $ + * $Id: subr_prof.c,v 1.15 1995/12/26 01:21:39 bde Exp $ */ #include <sys/param.h> @@ -62,6 +62,11 @@ kmstartup(dummy) { char *cp; struct gmonparam *p = &_gmonparam; +#ifdef GUPROF + fptrint_t kmstartup_addr; + int i; +#endif + /* * Round lowpc and highpc to multiples of the density we're using * so the rest of the scaling (here and in gprof) stays in ints. @@ -89,9 +94,74 @@ kmstartup(dummy) bzero(cp, p->kcountsize + p->tossize + p->fromssize); p->tos = (struct tostruct *)cp; cp += p->tossize; - p->kcount = (u_short *)cp; + p->kcount = (HISTCOUNTER *)cp; cp += p->kcountsize; p->froms = (u_short *)cp; + +#ifdef GUPROF + /* + * Initialize pointers to overhead counters. + */ + p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime)); + p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount)); + p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount)); + + /* + * Determine overheads. + */ + disable_intr(); + p->state = GMON_PROF_HIRES; + + p->cputime_overhead = 0; + (void)cputime(); + for (i = 0; i < CALIB_SCALE; i++) + p->cputime_overhead += cputime(); + + (void)cputime(); + for (i = 0; i < CALIB_SCALE; i++) +#if defined(i386) && __GNUC__ >= 2 + /* + * Underestimate slightly by always calling __mcount, never + * mcount. + */ + asm("pushl %0; call __mcount; popl %%ecx" + : + : "i" (kmstartup) + : "ax", "bx", "cx", "dx", "memory"); +#else +#error +#endif + p->mcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup)); + + (void)cputime(); + for (i = 0; i < CALIB_SCALE; i++) +#if defined(i386) && __GNUC__ >= 2 + asm("call mexitcount; 1:" + : : : "ax", "bx", "cx", "dx", "memory"); + asm("movl $1b,%0" : "=rm" (kmstartup_addr)); +#else +#error +#endif + p->mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr)); + + p->state = GMON_PROF_OFF; + enable_intr(); + + p->mcount_overhead_sub = p->mcount_overhead - p->cputime_overhead; + p->mexitcount_overhead_sub = p->mexitcount_overhead + - p->cputime_overhead; + printf("Profiling overheads: %u+%u %u+%u\n", + p->cputime_overhead, p->mcount_overhead_sub, + p->cputime_overhead, p->mexitcount_overhead_sub); + p->cputime_overhead_frac = p->cputime_overhead % CALIB_SCALE; + p->cputime_overhead /= CALIB_SCALE; + p->mcount_overhead_frac = p->mcount_overhead_sub % CALIB_SCALE; + p->mcount_overhead_sub /= CALIB_SCALE; + p->mcount_overhead /= CALIB_SCALE; + p->mexitcount_overhead_frac = p->mexitcount_overhead_sub % CALIB_SCALE; + p->mexitcount_overhead_sub /= CALIB_SCALE; + p->mexitcount_overhead /= CALIB_SCALE; +#endif /* GUPROF */ } /* @@ -104,6 +174,7 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS u_int namelen = arg2; struct gmonparam *gp = &_gmonparam; int error; + int state; /* all sysctl names at this level are terminal */ if (namelen != 1) @@ -111,13 +182,27 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS switch (name[0]) { case GPROF_STATE: - error = sysctl_handle_int(oidp, &gp->state, 0, req); + state = gp->state; + error = sysctl_handle_int(oidp, &state, 0, req); if (error) return (error); - if (gp->state == GMON_PROF_OFF) + if (!req->newptr) + return (0); + if (state == GMON_PROF_OFF) { stopprofclock(&proc0); - else + gp->state = state; + } else if (state == GMON_PROF_ON) { + gp->profrate = profhz; + gp->state = state; startprofclock(&proc0); +#ifdef GUPROF + } else if (state == GMON_PROF_HIRES) { + gp->profrate = 1193182; /* XXX */ + stopprofclock(&proc0); + gp->state = state; +#endif + } else if (state != gp->state) + return (EINVAL); return (0); case GPROF_COUNT: return (sysctl_handle_opaque(oidp, diff --git a/sys/libkern/mcount.c b/sys/libkern/mcount.c index fc3625b..ed0e68d 100644 --- a/sys/libkern/mcount.c +++ b/sys/libkern/mcount.c @@ -1,180 +1,4 @@ -/*- - * Copyright (c) 1983, 1992, 1993 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: mcount.c,v 1.3 1994/09/15 15:41:48 paul Exp $ - */ +#define GPROF4 1 /* XXX can't get at kernel options */ +#define GUPROF 1 /* XXX can't get at kernel options */ -#include <sys/param.h> -#include <sys/gmon.h> -#ifdef KERNEL -#include <i386/include/cpufunc.h> -#endif - -/* - * mcount is called on entry to each function compiled with the profiling - * switch set. _mcount(), which is declared in a machine-dependent way - * with _MCOUNT_DECL, does the actual work and is either inlined into a - * C routine or called by an assembly stub. In any case, this magic is - * taken care of by the MCOUNT definition in <machine/profile.h>. - * - * _mcount updates data structures that represent traversals of the - * program's call graph edges. frompc and selfpc are the return - * address and function address that represents the given call graph edge. - * - * Note: the original BSD code used the same variable (frompcindex) for - * both frompcindex and frompc. Any reasonable, modern compiler will - * perform this optimization. - */ -_MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */ - register u_long frompc, selfpc; -{ - register u_short *frompcindex; - register struct tostruct *top, *prevtop; - register struct gmonparam *p; - register long toindex; -#ifdef KERNEL - register int s; - u_long save_eflags; -#endif - - p = &_gmonparam; - /* - * check that we are profiling - * and that we aren't recursively invoked. - */ - if (p->state != GMON_PROF_ON) - return; -#ifdef KERNEL - MCOUNT_ENTER; -#else - p->state = GMON_PROF_BUSY; -#endif - /* - * check that frompcindex is a reasonable pc value. - * for example: signal catchers get called from the stack, - * not from text space. too bad. - */ - frompc -= p->lowpc; - if (frompc > p->textsize) - goto done; - - frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))]; - toindex = *frompcindex; - if (toindex == 0) { - /* - * first time traversing this arc - */ - toindex = ++p->tos[0].link; - if (toindex >= p->tolimit) - /* halt further profiling */ - goto overflow; - - *frompcindex = toindex; - top = &p->tos[toindex]; - top->selfpc = selfpc; - top->count = 1; - top->link = 0; - goto done; - } - top = &p->tos[toindex]; - if (top->selfpc == selfpc) { - /* - * arc at front of chain; usual case. - */ - top->count++; - goto done; - } - /* - * have to go looking down chain for it. - * top points to what we are looking at, - * prevtop points to previous top. - * we know it is not at the head of the chain. - */ - for (; /* goto done */; ) { - if (top->link == 0) { - /* - * top is end of the chain and none of the chain - * had top->selfpc == selfpc. - * so we allocate a new tostruct - * and link it to the head of the chain. - */ - toindex = ++p->tos[0].link; - if (toindex >= p->tolimit) - goto overflow; - - top = &p->tos[toindex]; - top->selfpc = selfpc; - top->count = 1; - top->link = *frompcindex; - *frompcindex = toindex; - goto done; - } - /* - * otherwise, check the next arc on the chain. - */ - prevtop = top; - top = &p->tos[top->link]; - if (top->selfpc == selfpc) { - /* - * there it is. - * increment its count - * move it to the head of the chain. - */ - top->count++; - toindex = prevtop->link; - prevtop->link = top->link; - top->link = *frompcindex; - *frompcindex = toindex; - goto done; - } - - } -done: -#ifdef KERNEL - MCOUNT_EXIT; -#else - p->state = GMON_PROF_ON; -#endif - return; -overflow: - p->state = GMON_PROF_ERROR; -#ifdef KERNEL - MCOUNT_EXIT; -#endif - return; -} - -/* - * Actual definition of mcount function. Defined in <machine/profile.h>, - * which is included by <sys/gmon.h>. - */ -MCOUNT +#include "../lib/libc/gmon/mcount.c" /* XXX */ diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h index 9b3882d..619e94c 100644 --- a/sys/sys/gmon.h +++ b/sys/sys/gmon.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)gmon.h 8.2 (Berkeley) 1/4/94 - * $Id: gmon.h,v 1.6 1995/05/30 08:14:22 rgrimes Exp $ + * $Id: gmon.h,v 1.7 1995/08/29 03:09:14 bde Exp $ */ #ifndef _SYS_GMON_H_ @@ -49,18 +49,33 @@ struct gmonhdr { int version; /* version number */ int profrate; /* profiling clock rate */ int spare[3]; /* reserved */ + /* XXX should record counter size and density */ }; #define GMONVERSION 0x00051879 /* - * histogram counters are unsigned shorts (according to the kernel). + * Type of histogram counters used in the kernel. */ +#ifdef GPROF4 +#define HISTCOUNTER unsigned +#else #define HISTCOUNTER unsigned short +#endif /* - * fraction of text space to allocate for histogram counters here, 1/2 + * Fraction of text space to allocate for histogram counters. + * We allocate counters at the same or higher density as function + * addresses, so that each counter belongs to a unique function. + * A lower density of counters would give less resolution but a + * higher density would be wasted. + * + * Assume that function addresses are at least 4-byte-aligned. + * It would be better to get the linker to align functions more + * strictly so that we could use smaller tables. */ -#define HISTFRACTION 2 +#define FUNCTION_ALIGNMENT 4 +#define HISTFRACTION (FUNCTION_ALIGNMENT / sizeof(HISTCOUNTER) == 0 \ + ? 1 : FUNCTION_ALIGNMENT / sizeof(HISTCOUNTER)) /* * Fraction of text space to allocate for from hash buckets. @@ -90,7 +105,23 @@ struct gmonhdr { * profiling data structures without (in practice) sacrificing * any granularity. */ -#define HASHFRACTION 2 +/* + * XXX I think the above analysis completely misses the point. I think + * the point is that addresses in different functions must hash to + * different values. Since the hash is essentially division by + * sizeof(unsigned short), the correct formula is: + * + * HASHFRACTION = MIN_FUNCTION_ALIGNMENT / sizeof(unsigned short) + * + * Note that he unsigned short here has nothing to do with the one for + * HISTFRACTION. + * + * Hash collisions from a two call sequence don't matter. They get + * handled like collisions for calls to different addresses from the + * same address through a function pointer. + */ +#define HASHFRACTION (FUNCTION_ALIGNMENT / sizeof(unsigned short) == 0 \ + ? 1 : FUNCTION_ALIGNMENT / sizeof(unsigned short)) /* * percent of text space to allocate for tostructs with a minimum. @@ -132,17 +163,33 @@ struct rawarc { */ struct gmonparam { int state; - u_short *kcount; + HISTCOUNTER *kcount; u_long kcountsize; u_short *froms; u_long fromssize; struct tostruct *tos; u_long tossize; long tolimit; - u_long lowpc; - u_long highpc; + fptrint_t lowpc; + fptrint_t highpc; u_long textsize; u_long hashfraction; + u_long profrate; + HISTCOUNTER *cputime_count; + u_int cputime_overhead; + u_int cputime_overhead_frac; + u_int cputime_overhead_resid; + u_int cputime_overhead_sub; + HISTCOUNTER *mcount_count; + u_int mcount_overhead; + u_int mcount_overhead_frac; + u_int mcount_overhead_resid; + u_int mcount_overhead_sub; + HISTCOUNTER *mexitcount_count; + u_int mexitcount_overhead; + u_int mexitcount_overhead_frac; + u_int mexitcount_overhead_resid; + u_int mexitcount_overhead_sub; }; extern struct gmonparam _gmonparam; @@ -153,6 +200,7 @@ extern struct gmonparam _gmonparam; #define GMON_PROF_BUSY 1 #define GMON_PROF_ERROR 2 #define GMON_PROF_OFF 3 +#define GMON_PROF_HIRES 4 /* * Sysctl definitions for extracting profiling information from the kernel. |