summaryrefslogtreecommitdiffstats
path: root/sys/i386
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>1995-12-29 15:30:05 +0000
committerbde <bde@FreeBSD.org>1995-12-29 15:30:05 +0000
commit586cc683d875b37dce82c825feb9ccc7d884b35e (patch)
tree9ce1e55534d3d930aead3ff55aeb7fcedbc086a4 /sys/i386
parentff6f507f6bbb3fda77fb14c7201db37bafea7a3f (diff)
downloadFreeBSD-src-586cc683d875b37dce82c825feb9ccc7d884b35e.zip
FreeBSD-src-586cc683d875b37dce82c825feb9ccc7d884b35e.tar.gz
Implemented non-statistical kernel profiling. This is based on
looking at a high resolution clock for each of the following events: function call, function return, interrupt entry, interrupt exit, and interesting branches. The differences between the times of these events are added at appropriate places in a ordinary histogram (as if very fast statistical profiling sampled the pc at those places) so that ordinary gprof can be used to analyze the times. gmon.h: Histogram counters need to be 4 bytes for microsecond resolutions. They will need to be larger for the 586 clock. The comments were vax-centric and wrong even on vaxes. Does anyone disagree? gprof4.c: The standard gprof should support counters of all integral sizes and the size of the counter should be in the gmon header. This hack will do until then. (Use gprof4 -u to examine the results of non-statistical profiling.) config/*: Non-statistical profiling is configured with `config -pp'. `config -p' still gives ordinary profiling. kgmon/*: Non-statistical profiling is enabled with `kgmon -B'. `kgmon -b' still enables ordinary profiling (and distables non-statistical profiling) if non-statistical profiling is configured.
Diffstat (limited to 'sys/i386')
-rw-r--r--sys/i386/conf/files.i3863
-rw-r--r--sys/i386/include/asmacros.h92
-rw-r--r--sys/i386/include/profile.h54
-rw-r--r--sys/i386/isa/prof_machdep.c153
4 files changed, 258 insertions, 44 deletions
diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386
index fda656d..1633ed8 100644
--- a/sys/i386/conf/files.i386
+++ b/sys/i386/conf/files.i386
@@ -1,7 +1,7 @@
# This file tells config what files go into building a kernel,
# files marked standard are always included.
#
-# $Id: files.i386,v 1.120 1995/12/26 12:50:01 bde Exp $
+# $Id: files.i386,v 1.121 1995/12/26 13:57:56 bde Exp $
#
aic7xxx_asm optional ahc device-driver \
dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \
@@ -115,6 +115,7 @@ i386/isa/pcvt/pcvt_kbd.c optional vt device-driver
i386/isa/pcvt/pcvt_out.c optional vt device-driver
i386/isa/pcvt/pcvt_sup.c optional vt device-driver
i386/isa/pcvt/pcvt_vtf.c optional vt device-driver
+i386/isa/prof_machdep.c optional profiling-routine
i386/isa/psm.c optional psm device-driver
i386/isa/random_machdep.c standard
i386/isa/rc.c optional rc device-driver
diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h
index b2a6dc8..8776ccf 100644
--- a/sys/i386/include/asmacros.h
+++ b/sys/i386/include/asmacros.h
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $
+ * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $
*/
#ifndef _MACHINE_ASMACROS_H_
@@ -38,47 +38,83 @@
#ifdef KERNEL
+/* XXX too much duplication in various asm*.h's and gprof.h's */
+
#define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */
#define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */
#define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */
-#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name:
-#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name)
-
-/* These three are place holders for future changes to the profiling code */
-#define MCOUNT_LABEL(name)
-#define MEXITCOUNT
-#define FAKE_MCOUNT(caller)
+#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name:
+#define NON_GPROF_ENTRY(name) GEN_ENTRY(name)
#ifdef GPROF
/*
- * ALTENTRY() must be before a corresponding ENTRY() so that it can jump
- * over the mcounting.
- */
-#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f
-#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2:
-/*
- * The call to mcount supports the usual (bad) conventions. We allocate
- * some data and pass a pointer to it although the FreeBSD doesn't use
- * the data. We set up a frame before calling mcount because that is
- * the standard convention although it makes work for both mcount and
- * callers.
+ * __mcount is like mcount except that doesn't require its caller to set
+ * up a frame pointer. It must be called before pushing anything onto the
+ * stack. gcc should eventually generate code to call __mcount in most
+ * cases. This would make -pg in combination with -fomit-frame-pointer
+ * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
+ * allow profiling before setting up the frame pointer, but this is
+ * inadequate for good handling of special cases, e.g., -fpic works best
+ * with profiling after the prologue.
+ *
+ * Neither __mcount nor mcount requires %eax to point to 4 bytes of data,
+ * so don't waste space allocating the data or time setting it up. Changes
+ * to avoid the wastage in gcc-2.4.5-compiled code are available.
+ *
+ * mexitcount is a new profiling feature to allow accurate timing of all
+ * functions if an accurate clock is available. Changes to gcc-2.4.5 to
+ * support it are are available. The changes currently don't allow not
+ * generating mexitcounts for non-kernel code. It is best to call
+ * mexitcount right at the end of a function like the MEXITCOUNT macro
+ * does, but the changes to gcc only implement calling it as the first
+ * thing in the epilogue to avoid problems with -fpic.
+ *
+ * mcount and __mexitcount may clobber the call-used registers and %ef.
+ * mexitcount may clobber %ecx and %ef.
+ *
+ * Cross-jumping makes accurate timing more difficult. It is handled in
+ * many cases by calling mexitcount before jumping. It is not handled
+ * for some conditional jumps (e.g., in bcopyx) or for some fault-handling
+ * jumps. It is handled for some fault-handling jumps by not sharing the
+ * exit routine.
+ *
+ * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
+ * the main entry point. Note that alt entries are counted twice. They
+ * have to be counted as ordinary entries for gprof to get the call times
+ * right for the ordinary entries.
+ *
+ * High local labels are used in macros to avoid clashes with local labels
+ * in functions.
+ *
+ * "ret" is used instead of "RET" because there are a lot of "ret"s.
+ * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this
+ * file is preprocessed in traditional mode). "ret" clobbers eflags
+ * but this doesn't matter.
*/
-#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \
- pushl %ebp; movl %esp,%ebp; \
- movl $1b,%eax; call mcount; popl %ebp
-#else
+#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
+#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT
+#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx
+#define MCOUNT call __mcount
+#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT
+#define MEXITCOUNT call mexitcount
+#define ret MEXITCOUNT ; .byte 0xc3
+#else /* not GPROF */
/*
* ALTENTRY() has to align because it is before a corresponding ENTRY().
* ENTRY() has to align to because there may be no ALTENTRY() before it.
- * If there is a previous ALTENTRY() then the alignment code is empty.
+ * If there is a previous ALTENTRY() then the alignment code for ENTRY()
+ * is empty.
*/
-#define ALTENTRY(name) GEN_ENTRY(_/**/name)
-#define ENTRY(name) GEN_ENTRY(_/**/name)
+#define ALTENTRY(name) GEN_ENTRY(name)
+#define ENTRY(name) GEN_ENTRY(name)
+#define FAKE_MCOUNT(caller)
#define MCOUNT
+#define MCOUNT_LABEL(name)
+#define MEXITCOUNT
+#endif /* GPROF */
-#endif
-
+/* XXX NOP and FASTER_NOP are misleadingly named */
#ifdef DUMMY_NOPS /* this will break some older machines */
#define FASTER_NOP
#define NOP
diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h
index 9fe27ec..c55d629 100644
--- a/sys/i386/include/profile.h
+++ b/sys/i386/include/profile.h
@@ -31,35 +31,59 @@
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
- * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $
+ * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $
*/
-#ifndef _I386_MACHINE_PROFILE_H_
-#define _I386_MACHINE_PROFILE_H_
+#ifndef _MACHINE_PROFILE_H_
+#define _MACHINE_PROFILE_H_
+#if 0
#define _MCOUNT_DECL static inline void _mcount
#define MCOUNT \
extern void mcount() asm("mcount"); void mcount() { \
- int selfpc, frompcindex; \
+ fptrint_t selfpc, frompc; \
/* \
- * find the return address for mcount, \
+ * Find the return address for mcount, \
* and the return address for mcount's caller. \
* \
- * selfpc = pc pushed by mcount call \
+ * selfpc = pc pushed by call to mcount \
*/ \
asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \
/* \
- * frompcindex = pc pushed by jsr into self. \
- * In GCC the caller's stack frame has already been built so we \
- * have to chase a6 to find caller's raddr. \
+ * frompc = pc pushed by call to mcount's caller. \
+ * The caller's stack frame has already been built, so %ebp is \
+ * the caller's frame pointer. The caller's raddr is in the \
+ * caller's frame following the caller's caller's frame pointer. \
*/ \
- asm("movl (%%ebp),%0" : "=r" (frompcindex)); \
- frompcindex = ((int *)frompcindex)[1]; \
- _mcount(frompcindex, selfpc); \
+ asm("movl (%%ebp),%0" : "=r" (frompc)); \
+ frompc = ((fptrint_t *)frompc)[1]; \
+ _mcount(frompc, selfpc); \
}
+#else
+#define _MCOUNT_DECL void mcount
+#define MCOUNT
+#endif
-#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr()
-#define MCOUNT_EXIT write_eflags(save_eflags)
+#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); }
+#define MCOUNT_EXIT (write_eflags(save_eflags))
-#endif
+#define CALIB_SCALE 1000
+#define KCOUNT(p,index) ((p)->kcount[(index) \
+ / (HISTFRACTION * sizeof(*(p)->kcount))])
+#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc)
+
+/* An unsigned integral type that can hold function pointers. */
+typedef u_int fptrint_t;
+
+/*
+ * An unsigned integral type that can hold non-negative difference between
+ * function pointers.
+ */
+typedef int fptrdiff_t;
+
+u_int cputime __P((void));
+void mcount __P((fptrint_t frompc, fptrint_t selfpc));
+void mexitcount __P((fptrint_t selfpc));
+
+#endif /* !MACHINE_PROFILE_H */
diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c
new file mode 100644
index 0000000..2aa6787
--- /dev/null
+++ b/sys/i386/isa/prof_machdep.c
@@ -0,0 +1,153 @@
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <machine/clock.h>
+#include <i386/isa/isa.h>
+#include <i386/isa/timerreg.h>
+
+#ifdef GUPROF
+extern u_int cputime __P((void));
+#endif
+
+#ifdef __GNUC__
+asm("
+GM_STATE = 0
+GMON_PROF_OFF = 3
+
+ .text
+ .align 4,0x90
+ .globl __mcount
+__mcount:
+ #
+ # Check that we are profiling. Do it early for speed.
+ #
+ cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE
+ je Lmcount_exit
+ #
+ # __mcount is the same as mcount except the caller hasn't changed
+ # the stack except to call here, so the caller's raddr is above
+ # our raddr.
+ #
+ movl 4(%esp),%edx
+ jmp Lgot_frompc
+
+ .align 4,0x90
+ .globl mcount
+mcount:
+ cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE
+ je Lmcount_exit
+ #
+ # The caller's stack frame has already been built, so %ebp is
+ # the caller's frame pointer. The caller's raddr is in the
+ # caller's frame following the caller's caller's frame pointer.
+ #
+ movl 4(%ebp),%edx
+Lgot_frompc:
+ #
+ # Our raddr is the caller's pc.
+ #
+ movl (%esp),%eax
+
+ pushf
+ pushl %eax
+ pushl %edx
+ cli
+ call _mcount
+ addl $8,%esp
+ popf
+Lmcount_exit:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+
+#ifdef GUPROF
+/*
+ * mexitcount saves the return register(s), loads selfpc and calls
+ * mexitcount(selfpc) to do the work. Someday it should be in a machine
+ * dependent file together with cputime(), __mcount and mcount. cputime()
+ * can't just be put in machdep.c because it has to be compiled without -pg.
+ */
+#ifdef __GNUC__
+asm("
+ .text
+#
+# Dummy label to be seen when gprof -u hides mexitcount.
+#
+ .align 4,0x90
+ .globl __mexitcount
+__mexitcount:
+ nop
+
+GMON_PROF_HIRES = 4
+
+ .align 4,0x90
+ .globl mexitcount
+mexitcount:
+ cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE
+ jne Lmexitcount_exit
+ pushl %edx
+ pushl %eax
+ movl 8(%esp),%eax
+ pushf
+ pushl %eax
+ cli
+ call _mexitcount
+ addl $4,%esp
+ popf
+ popl %eax
+ popl %edx
+Lmexitcount_exit:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+
+/*
+ * Return the time elapsed since the last call. The units are machine-
+ * dependent.
+ */
+u_int
+cputime()
+{
+ u_int count;
+ u_int delta;
+ u_char low;
+ static u_int prev_count;
+
+ /*
+ * Read the current value of the 8254 timer counter 0.
+ */
+ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+ low = inb(TIMER_CNTR0);
+ count = low | (inb(TIMER_CNTR0) << 8);
+
+ /*
+ * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
+ * While profiling is enabled, this routine is called at least twice
+ * per timer reset (for mcounting and mexitcounting hardclock()),
+ * so at most one reset has occurred since the last call, and one
+ * has occurred iff the current count is larger than the previous
+ * count. This allows counter underflow to be detected faster
+ * than in microtime().
+ */
+ delta = prev_count - count;
+ prev_count = count;
+ if ((int) delta <= 0)
+ return (delta + timer0_max_count);
+ return (delta);
+}
+#else /* not GUPROF */
+#ifdef __GNUC__
+asm("
+ .text
+ .align 4,0x90
+ .globl mexitcount
+mexitcount:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+#endif /* GUPROF */
OpenPOWER on IntegriCloud