diff options
author | bde <bde@FreeBSD.org> | 1995-12-29 15:30:05 +0000 |
---|---|---|
committer | bde <bde@FreeBSD.org> | 1995-12-29 15:30:05 +0000 |
commit | 586cc683d875b37dce82c825feb9ccc7d884b35e (patch) | |
tree | 9ce1e55534d3d930aead3ff55aeb7fcedbc086a4 /sys/amd64 | |
parent | ff6f507f6bbb3fda77fb14c7201db37bafea7a3f (diff) | |
download | FreeBSD-src-586cc683d875b37dce82c825feb9ccc7d884b35e.zip FreeBSD-src-586cc683d875b37dce82c825feb9ccc7d884b35e.tar.gz |
Implemented non-statistical kernel profiling. This is based on
looking at a high resolution clock for each of the following events:
function call, function return, interrupt entry, interrupt exit,
and interesting branches. The differences between the times of
these events are added at appropriate places in a ordinary histogram
(as if very fast statistical profiling sampled the pc at those
places) so that ordinary gprof can be used to analyze the times.
gmon.h:
Histogram counters need to be 4 bytes for microsecond resolutions.
They will need to be larger for the 586 clock.
The comments were vax-centric and wrong even on vaxes. Does anyone
disagree?
gprof4.c:
The standard gprof should support counters of all integral sizes
and the size of the counter should be in the gmon header. This
hack will do until then. (Use gprof4 -u to examine the results
of non-statistical profiling.)
config/*:
Non-statistical profiling is configured with `config -pp'.
`config -p' still gives ordinary profiling.
kgmon/*:
Non-statistical profiling is enabled with `kgmon -B'. `kgmon -b'
still enables ordinary profiling (and distables non-statistical
profiling) if non-statistical profiling is configured.
Diffstat (limited to 'sys/amd64')
-rw-r--r-- | sys/amd64/amd64/prof_machdep.c | 153 | ||||
-rw-r--r-- | sys/amd64/include/asmacros.h | 92 | ||||
-rw-r--r-- | sys/amd64/include/profile.h | 54 |
3 files changed, 256 insertions, 43 deletions
diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c new file mode 100644 index 0000000..2aa6787 --- /dev/null +++ b/sys/amd64/amd64/prof_machdep.c @@ -0,0 +1,153 @@ +#include <sys/param.h> +#include <sys/systm.h> +#include <machine/clock.h> +#include <i386/isa/isa.h> +#include <i386/isa/timerreg.h> + +#ifdef GUPROF +extern u_int cputime __P((void)); +#endif + +#ifdef __GNUC__ +asm(" +GM_STATE = 0 +GMON_PROF_OFF = 3 + + .text + .align 4,0x90 + .globl __mcount +__mcount: + # + # Check that we are profiling. Do it early for speed. + # + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # __mcount is the same as mcount except the caller hasn't changed + # the stack except to call here, so the caller's raddr is above + # our raddr. + # + movl 4(%esp),%edx + jmp Lgot_frompc + + .align 4,0x90 + .globl mcount +mcount: + cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE + je Lmcount_exit + # + # The caller's stack frame has already been built, so %ebp is + # the caller's frame pointer. The caller's raddr is in the + # caller's frame following the caller's caller's frame pointer. + # + movl 4(%ebp),%edx +Lgot_frompc: + # + # Our raddr is the caller's pc. + # + movl (%esp),%eax + + pushf + pushl %eax + pushl %edx + cli + call _mcount + addl $8,%esp + popf +Lmcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +#ifdef GUPROF +/* + * mexitcount saves the return register(s), loads selfpc and calls + * mexitcount(selfpc) to do the work. Someday it should be in a machine + * dependent file together with cputime(), __mcount and mcount. cputime() + * can't just be put in machdep.c because it has to be compiled without -pg. + */ +#ifdef __GNUC__ +asm(" + .text +# +# Dummy label to be seen when gprof -u hides mexitcount. +# + .align 4,0x90 + .globl __mexitcount +__mexitcount: + nop + +GMON_PROF_HIRES = 4 + + .align 4,0x90 + .globl mexitcount +mexitcount: + cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE + jne Lmexitcount_exit + pushl %edx + pushl %eax + movl 8(%esp),%eax + pushf + pushl %eax + cli + call _mexitcount + addl $4,%esp + popf + popl %eax + popl %edx +Lmexitcount_exit: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ + +/* + * Return the time elapsed since the last call. The units are machine- + * dependent. + */ +u_int +cputime() +{ + u_int count; + u_int delta; + u_char low; + static u_int prev_count; + + /* + * Read the current value of the 8254 timer counter 0. + */ + outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); + low = inb(TIMER_CNTR0); + count = low | (inb(TIMER_CNTR0) << 8); + + /* + * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets. + * While profiling is enabled, this routine is called at least twice + * per timer reset (for mcounting and mexitcounting hardclock()), + * so at most one reset has occurred since the last call, and one + * has occurred iff the current count is larger than the previous + * count. This allows counter underflow to be detected faster + * than in microtime(). + */ + delta = prev_count - count; + prev_count = count; + if ((int) delta <= 0) + return (delta + timer0_max_count); + return (delta); +} +#else /* not GUPROF */ +#ifdef __GNUC__ +asm(" + .text + .align 4,0x90 + .globl mexitcount +mexitcount: + ret +"); +#else /* !__GNUC__ */ +#error +#endif /* __GNUC__ */ +#endif /* GUPROF */ diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h index b2a6dc8..8776ccf 100644 --- a/sys/amd64/include/asmacros.h +++ b/sys/amd64/include/asmacros.h @@ -30,7 +30,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $ + * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $ */ #ifndef _MACHINE_ASMACROS_H_ @@ -38,47 +38,83 @@ #ifdef KERNEL +/* XXX too much duplication in various asm*.h's and gprof.h's */ + #define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */ #define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */ #define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */ -#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name: -#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name) - -/* These three are place holders for future changes to the profiling code */ -#define MCOUNT_LABEL(name) -#define MEXITCOUNT -#define FAKE_MCOUNT(caller) +#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name: +#define NON_GPROF_ENTRY(name) GEN_ENTRY(name) #ifdef GPROF /* - * ALTENTRY() must be before a corresponding ENTRY() so that it can jump - * over the mcounting. - */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f -#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2: -/* - * The call to mcount supports the usual (bad) conventions. We allocate - * some data and pass a pointer to it although the FreeBSD doesn't use - * the data. We set up a frame before calling mcount because that is - * the standard convention although it makes work for both mcount and - * callers. + * __mcount is like mcount except that doesn't require its caller to set + * up a frame pointer. It must be called before pushing anything onto the + * stack. gcc should eventually generate code to call __mcount in most + * cases. This would make -pg in combination with -fomit-frame-pointer + * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to + * allow profiling before setting up the frame pointer, but this is + * inadequate for good handling of special cases, e.g., -fpic works best + * with profiling after the prologue. + * + * Neither __mcount nor mcount requires %eax to point to 4 bytes of data, + * so don't waste space allocating the data or time setting it up. Changes + * to avoid the wastage in gcc-2.4.5-compiled code are available. + * + * mexitcount is a new profiling feature to allow accurate timing of all + * functions if an accurate clock is available. Changes to gcc-2.4.5 to + * support it are are available. The changes currently don't allow not + * generating mexitcounts for non-kernel code. It is best to call + * mexitcount right at the end of a function like the MEXITCOUNT macro + * does, but the changes to gcc only implement calling it as the first + * thing in the epilogue to avoid problems with -fpic. + * + * mcount and __mexitcount may clobber the call-used registers and %ef. + * mexitcount may clobber %ecx and %ef. + * + * Cross-jumping makes accurate timing more difficult. It is handled in + * many cases by calling mexitcount before jumping. It is not handled + * for some conditional jumps (e.g., in bcopyx) or for some fault-handling + * jumps. It is handled for some fault-handling jumps by not sharing the + * exit routine. + * + * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to + * the main entry point. Note that alt entries are counted twice. They + * have to be counted as ordinary entries for gprof to get the call times + * right for the ordinary entries. + * + * High local labels are used in macros to avoid clashes with local labels + * in functions. + * + * "ret" is used instead of "RET" because there are a lot of "ret"s. + * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this + * file is preprocessed in traditional mode). "ret" clobbers eflags + * but this doesn't matter. */ -#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \ - pushl %ebp; movl %esp,%ebp; \ - movl $1b,%eax; call mcount; popl %ebp -#else +#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f +#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT +#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx +#define MCOUNT call __mcount +#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT +#define MEXITCOUNT call mexitcount +#define ret MEXITCOUNT ; .byte 0xc3 +#else /* not GPROF */ /* * ALTENTRY() has to align because it is before a corresponding ENTRY(). * ENTRY() has to align to because there may be no ALTENTRY() before it. - * If there is a previous ALTENTRY() then the alignment code is empty. + * If there is a previous ALTENTRY() then the alignment code for ENTRY() + * is empty. */ -#define ALTENTRY(name) GEN_ENTRY(_/**/name) -#define ENTRY(name) GEN_ENTRY(_/**/name) +#define ALTENTRY(name) GEN_ENTRY(name) +#define ENTRY(name) GEN_ENTRY(name) +#define FAKE_MCOUNT(caller) #define MCOUNT +#define MCOUNT_LABEL(name) +#define MEXITCOUNT +#endif /* GPROF */ -#endif - +/* XXX NOP and FASTER_NOP are misleadingly named */ #ifdef DUMMY_NOPS /* this will break some older machines */ #define FASTER_NOP #define NOP diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h index 9fe27ec..c55d629 100644 --- a/sys/amd64/include/profile.h +++ b/sys/amd64/include/profile.h @@ -31,35 +31,59 @@ * SUCH DAMAGE. * * @(#)profile.h 8.1 (Berkeley) 6/11/93 - * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $ + * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $ */ -#ifndef _I386_MACHINE_PROFILE_H_ -#define _I386_MACHINE_PROFILE_H_ +#ifndef _MACHINE_PROFILE_H_ +#define _MACHINE_PROFILE_H_ +#if 0 #define _MCOUNT_DECL static inline void _mcount #define MCOUNT \ extern void mcount() asm("mcount"); void mcount() { \ - int selfpc, frompcindex; \ + fptrint_t selfpc, frompc; \ /* \ - * find the return address for mcount, \ + * Find the return address for mcount, \ * and the return address for mcount's caller. \ * \ - * selfpc = pc pushed by mcount call \ + * selfpc = pc pushed by call to mcount \ */ \ asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \ /* \ - * frompcindex = pc pushed by jsr into self. \ - * In GCC the caller's stack frame has already been built so we \ - * have to chase a6 to find caller's raddr. \ + * frompc = pc pushed by call to mcount's caller. \ + * The caller's stack frame has already been built, so %ebp is \ + * the caller's frame pointer. The caller's raddr is in the \ + * caller's frame following the caller's caller's frame pointer. \ */ \ - asm("movl (%%ebp),%0" : "=r" (frompcindex)); \ - frompcindex = ((int *)frompcindex)[1]; \ - _mcount(frompcindex, selfpc); \ + asm("movl (%%ebp),%0" : "=r" (frompc)); \ + frompc = ((fptrint_t *)frompc)[1]; \ + _mcount(frompc, selfpc); \ } +#else +#define _MCOUNT_DECL void mcount +#define MCOUNT +#endif -#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr() -#define MCOUNT_EXIT write_eflags(save_eflags) +#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); } +#define MCOUNT_EXIT (write_eflags(save_eflags)) -#endif +#define CALIB_SCALE 1000 +#define KCOUNT(p,index) ((p)->kcount[(index) \ + / (HISTFRACTION * sizeof(*(p)->kcount))]) +#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc) + +/* An unsigned integral type that can hold function pointers. */ +typedef u_int fptrint_t; + +/* + * An unsigned integral type that can hold non-negative difference between + * function pointers. + */ +typedef int fptrdiff_t; + +u_int cputime __P((void)); +void mcount __P((fptrint_t frompc, fptrint_t selfpc)); +void mexitcount __P((fptrint_t selfpc)); + +#endif /* !MACHINE_PROFILE_H */ |