summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/libc/gmon/mcount.c156
-rw-r--r--sys/amd64/amd64/prof_machdep.c153
-rw-r--r--sys/amd64/include/asmacros.h92
-rw-r--r--sys/amd64/include/profile.h54
-rw-r--r--sys/conf/files.i3863
-rw-r--r--sys/i386/conf/files.i3863
-rw-r--r--sys/i386/include/asmacros.h92
-rw-r--r--sys/i386/include/profile.h54
-rw-r--r--sys/i386/isa/prof_machdep.c153
-rw-r--r--sys/kern/subr_prof.c95
-rw-r--r--sys/libkern/mcount.c182
-rw-r--r--sys/sys/gmon.h64
-rw-r--r--usr.bin/Makefile3
-rw-r--r--usr.bin/gprof4/Makefile14
-rw-r--r--usr.sbin/config/config.85
-rw-r--r--usr.sbin/config/mkmakefile.c8
-rw-r--r--usr.sbin/kgmon/kgmon.811
-rw-r--r--usr.sbin/kgmon/kgmon.c37
18 files changed, 876 insertions, 303 deletions
diff --git a/lib/libc/gmon/mcount.c b/lib/libc/gmon/mcount.c
index 63fbf88..59d8de4 100644
--- a/lib/libc/gmon/mcount.c
+++ b/lib/libc/gmon/mcount.c
@@ -38,7 +38,14 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#include <sys/param.h>
#include <sys/gmon.h>
#ifdef KERNEL
-#include <i386/include/cpufunc.h>
+#include <sys/systm.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+void bintr __P((void));
+void btrap __P((void));
+void eintr __P((void));
+void user __P((void));
#endif
/*
@@ -57,39 +64,127 @@ static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
* perform this optimization.
*/
_MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */
- register u_long frompc, selfpc;
+ register fptrint_t frompc, selfpc;
{
+#ifdef GUPROF
+ u_int delta;
+#endif
+ register fptrdiff_t frompci;
register u_short *frompcindex;
register struct tostruct *top, *prevtop;
register struct gmonparam *p;
register long toindex;
#ifdef KERNEL
- register int s;
- u_long save_eflags;
+ register int s; /* XXX */
+ u_long save_eflags; /* XXX */
#endif
p = &_gmonparam;
+#ifndef GUPROF /* XXX */
/*
* check that we are profiling
* and that we aren't recursively invoked.
*/
if (p->state != GMON_PROF_ON)
return;
+#endif
#ifdef KERNEL
MCOUNT_ENTER;
#else
p->state = GMON_PROF_BUSY;
#endif
+ frompci = frompc - p->lowpc;
+
+#ifdef KERNEL
+ /*
+ * When we are called from an exception handler, frompci may be
+ * for a user address. Convert such frompci's to the index of
+ * user() to merge all user counts.
+ */
+ if (frompci >= p->textsize) {
+ if (frompci + p->lowpc
+ >= (fptrint_t)(VM_MAXUSER_ADDRESS + UPAGES * NBPG))
+ goto done;
+ frompci = (fptrint_t)user - p->lowpc;
+ if (frompci >= p->textsize)
+ goto done;
+ }
+#endif /* KERNEL */
+
+#ifdef GUPROF
+ if (p->state != GMON_PROF_HIRES)
+ goto skip_guprof_stuff;
+ /*
+ * Look at the clock and add the count of clock cycles since the
+ * clock was last looked at to a counter for frompc. This
+ * solidifies the count for the function containing frompc and
+ * effectively starts another clock for the current function.
+ * The count for the new clock will be solidified when another
+ * function call is made or the function returns.
+ *
+ * We use the usual sampling counters since they can be located
+ * efficiently. 4-byte counters are usually necessary.
+ *
+ * There are many complications for subtracting the profiling
+ * overheads from the counts for normal functions and adding
+ * them to the counts for mcount(), mexitcount() and cputime().
+ * We attempt to handle fractional cycles, but the overheads
+ * are usually underestimated because they are calibrated for
+ * a simpler than usual setup.
+ */
+ delta = cputime() - p->mcount_overhead;
+ p->cputime_overhead_resid += p->cputime_overhead_frac;
+ p->mcount_overhead_resid += p->mcount_overhead_frac;
+ if ((int)delta < 0)
+ *p->mcount_count += delta + p->mcount_overhead
+ - p->cputime_overhead;
+ else if (delta != 0) {
+ if (p->cputime_overhead_resid >= CALIB_SCALE) {
+ p->cputime_overhead_resid -= CALIB_SCALE;
+ ++*p->cputime_count;
+ --delta;
+ }
+ if (delta != 0) {
+ if (p->mcount_overhead_resid >= CALIB_SCALE) {
+ p->mcount_overhead_resid -= CALIB_SCALE;
+ ++*p->mcount_count;
+ --delta;
+ }
+ KCOUNT(p, frompci) += delta;
+ }
+ *p->mcount_count += p->mcount_overhead_sub;
+ }
+ *p->cputime_count += p->cputime_overhead;
+skip_guprof_stuff:
+#endif /* GUPROF */
+
+#ifdef KERNEL
+ /*
+ * When we are called from an exception handler, frompc is faked
+ * to be for where the exception occurred. We've just solidified
+ * the count for there. Now convert frompci to the index of btrap()
+ * for trap handlers and bintr() for interrupt handlers to make
+ * exceptions appear in the call graph as calls from btrap() and
+ * bintr() instead of calls from all over.
+ */
+ if ((fptrint_t)selfpc >= (fptrint_t)btrap
+ && (fptrint_t)selfpc < (fptrint_t)eintr) {
+ if ((fptrint_t)selfpc >= (fptrint_t)bintr)
+ frompci = (fptrint_t)bintr - p->lowpc;
+ else
+ frompci = (fptrint_t)btrap - p->lowpc;
+ }
+#endif /* KERNEL */
+
/*
- * check that frompcindex is a reasonable pc value.
+ * check that frompc is a reasonable pc value.
* for example: signal catchers get called from the stack,
* not from text space. too bad.
*/
- frompc -= p->lowpc;
- if (frompc > p->textsize)
+ if (frompci >= p->textsize)
goto done;
- frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))];
+ frompcindex = &p->froms[frompci / (p->hashfraction * sizeof(*p->froms))];
toindex = *frompcindex;
if (toindex == 0) {
/*
@@ -180,3 +275,48 @@ overflow:
* which is included by <sys/gmon.h>.
*/
MCOUNT
+
+#ifdef GUPROF
+void
+mexitcount(selfpc)
+ fptrint_t selfpc;
+{
+ struct gmonparam *p;
+ fptrint_t selfpcdiff;
+
+ p = &_gmonparam;
+ selfpcdiff = selfpc - (fptrint_t)p->lowpc;
+ if (selfpcdiff < p->textsize) {
+ u_int delta;
+
+ /*
+ * Solidify the count for the current function.
+ */
+ delta = cputime() - p->mexitcount_overhead;
+ p->cputime_overhead_resid += p->cputime_overhead_frac;
+ p->mexitcount_overhead_resid += p->mexitcount_overhead_frac;
+ if ((int)delta < 0)
+ *p->mexitcount_count += delta + p->mexitcount_overhead
+ - p->cputime_overhead;
+ else if (delta != 0) {
+ if (p->cputime_overhead_resid >= CALIB_SCALE) {
+ p->cputime_overhead_resid -= CALIB_SCALE;
+ ++*p->cputime_count;
+ --delta;
+ }
+ if (delta != 0) {
+ if (p->mexitcount_overhead_resid
+ >= CALIB_SCALE) {
+ p->mexitcount_overhead_resid
+ -= CALIB_SCALE;
+ ++*p->mexitcount_count;
+ --delta;
+ }
+ KCOUNT(p, selfpcdiff) += delta;
+ }
+ *p->mexitcount_count += p->mexitcount_overhead_sub;
+ }
+ *p->cputime_count += p->cputime_overhead;
+ }
+}
+#endif /* GUPROF */
diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c
new file mode 100644
index 0000000..2aa6787
--- /dev/null
+++ b/sys/amd64/amd64/prof_machdep.c
@@ -0,0 +1,153 @@
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <machine/clock.h>
+#include <i386/isa/isa.h>
+#include <i386/isa/timerreg.h>
+
+#ifdef GUPROF
+extern u_int cputime __P((void));
+#endif
+
+#ifdef __GNUC__
+asm("
+GM_STATE = 0
+GMON_PROF_OFF = 3
+
+ .text
+ .align 4,0x90
+ .globl __mcount
+__mcount:
+ #
+ # Check that we are profiling. Do it early for speed.
+ #
+ cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE
+ je Lmcount_exit
+ #
+ # __mcount is the same as mcount except the caller hasn't changed
+ # the stack except to call here, so the caller's raddr is above
+ # our raddr.
+ #
+ movl 4(%esp),%edx
+ jmp Lgot_frompc
+
+ .align 4,0x90
+ .globl mcount
+mcount:
+ cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE
+ je Lmcount_exit
+ #
+ # The caller's stack frame has already been built, so %ebp is
+ # the caller's frame pointer. The caller's raddr is in the
+ # caller's frame following the caller's caller's frame pointer.
+ #
+ movl 4(%ebp),%edx
+Lgot_frompc:
+ #
+ # Our raddr is the caller's pc.
+ #
+ movl (%esp),%eax
+
+ pushf
+ pushl %eax
+ pushl %edx
+ cli
+ call _mcount
+ addl $8,%esp
+ popf
+Lmcount_exit:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+
+#ifdef GUPROF
+/*
+ * mexitcount saves the return register(s), loads selfpc and calls
+ * mexitcount(selfpc) to do the work. Someday it should be in a machine
+ * dependent file together with cputime(), __mcount and mcount. cputime()
+ * can't just be put in machdep.c because it has to be compiled without -pg.
+ */
+#ifdef __GNUC__
+asm("
+ .text
+#
+# Dummy label to be seen when gprof -u hides mexitcount.
+#
+ .align 4,0x90
+ .globl __mexitcount
+__mexitcount:
+ nop
+
+GMON_PROF_HIRES = 4
+
+ .align 4,0x90
+ .globl mexitcount
+mexitcount:
+ cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE
+ jne Lmexitcount_exit
+ pushl %edx
+ pushl %eax
+ movl 8(%esp),%eax
+ pushf
+ pushl %eax
+ cli
+ call _mexitcount
+ addl $4,%esp
+ popf
+ popl %eax
+ popl %edx
+Lmexitcount_exit:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+
+/*
+ * Return the time elapsed since the last call. The units are machine-
+ * dependent.
+ */
+u_int
+cputime()
+{
+ u_int count;
+ u_int delta;
+ u_char low;
+ static u_int prev_count;
+
+ /*
+ * Read the current value of the 8254 timer counter 0.
+ */
+ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+ low = inb(TIMER_CNTR0);
+ count = low | (inb(TIMER_CNTR0) << 8);
+
+ /*
+ * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
+ * While profiling is enabled, this routine is called at least twice
+ * per timer reset (for mcounting and mexitcounting hardclock()),
+ * so at most one reset has occurred since the last call, and one
+ * has occurred iff the current count is larger than the previous
+ * count. This allows counter underflow to be detected faster
+ * than in microtime().
+ */
+ delta = prev_count - count;
+ prev_count = count;
+ if ((int) delta <= 0)
+ return (delta + timer0_max_count);
+ return (delta);
+}
+#else /* not GUPROF */
+#ifdef __GNUC__
+asm("
+ .text
+ .align 4,0x90
+ .globl mexitcount
+mexitcount:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+#endif /* GUPROF */
diff --git a/sys/amd64/include/asmacros.h b/sys/amd64/include/asmacros.h
index b2a6dc8..8776ccf 100644
--- a/sys/amd64/include/asmacros.h
+++ b/sys/amd64/include/asmacros.h
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $
+ * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $
*/
#ifndef _MACHINE_ASMACROS_H_
@@ -38,47 +38,83 @@
#ifdef KERNEL
+/* XXX too much duplication in various asm*.h's and gprof.h's */
+
#define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */
#define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */
#define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */
-#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name:
-#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name)
-
-/* These three are place holders for future changes to the profiling code */
-#define MCOUNT_LABEL(name)
-#define MEXITCOUNT
-#define FAKE_MCOUNT(caller)
+#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name:
+#define NON_GPROF_ENTRY(name) GEN_ENTRY(name)
#ifdef GPROF
/*
- * ALTENTRY() must be before a corresponding ENTRY() so that it can jump
- * over the mcounting.
- */
-#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f
-#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2:
-/*
- * The call to mcount supports the usual (bad) conventions. We allocate
- * some data and pass a pointer to it although the FreeBSD doesn't use
- * the data. We set up a frame before calling mcount because that is
- * the standard convention although it makes work for both mcount and
- * callers.
+ * __mcount is like mcount except that doesn't require its caller to set
+ * up a frame pointer. It must be called before pushing anything onto the
+ * stack. gcc should eventually generate code to call __mcount in most
+ * cases. This would make -pg in combination with -fomit-frame-pointer
+ * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
+ * allow profiling before setting up the frame pointer, but this is
+ * inadequate for good handling of special cases, e.g., -fpic works best
+ * with profiling after the prologue.
+ *
+ * Neither __mcount nor mcount requires %eax to point to 4 bytes of data,
+ * so don't waste space allocating the data or time setting it up. Changes
+ * to avoid the wastage in gcc-2.4.5-compiled code are available.
+ *
+ * mexitcount is a new profiling feature to allow accurate timing of all
+ * functions if an accurate clock is available. Changes to gcc-2.4.5 to
+ * support it are are available. The changes currently don't allow not
+ * generating mexitcounts for non-kernel code. It is best to call
+ * mexitcount right at the end of a function like the MEXITCOUNT macro
+ * does, but the changes to gcc only implement calling it as the first
+ * thing in the epilogue to avoid problems with -fpic.
+ *
+ * mcount and __mexitcount may clobber the call-used registers and %ef.
+ * mexitcount may clobber %ecx and %ef.
+ *
+ * Cross-jumping makes accurate timing more difficult. It is handled in
+ * many cases by calling mexitcount before jumping. It is not handled
+ * for some conditional jumps (e.g., in bcopyx) or for some fault-handling
+ * jumps. It is handled for some fault-handling jumps by not sharing the
+ * exit routine.
+ *
+ * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
+ * the main entry point. Note that alt entries are counted twice. They
+ * have to be counted as ordinary entries for gprof to get the call times
+ * right for the ordinary entries.
+ *
+ * High local labels are used in macros to avoid clashes with local labels
+ * in functions.
+ *
+ * "ret" is used instead of "RET" because there are a lot of "ret"s.
+ * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this
+ * file is preprocessed in traditional mode). "ret" clobbers eflags
+ * but this doesn't matter.
*/
-#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \
- pushl %ebp; movl %esp,%ebp; \
- movl $1b,%eax; call mcount; popl %ebp
-#else
+#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
+#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT
+#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx
+#define MCOUNT call __mcount
+#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT
+#define MEXITCOUNT call mexitcount
+#define ret MEXITCOUNT ; .byte 0xc3
+#else /* not GPROF */
/*
* ALTENTRY() has to align because it is before a corresponding ENTRY().
* ENTRY() has to align to because there may be no ALTENTRY() before it.
- * If there is a previous ALTENTRY() then the alignment code is empty.
+ * If there is a previous ALTENTRY() then the alignment code for ENTRY()
+ * is empty.
*/
-#define ALTENTRY(name) GEN_ENTRY(_/**/name)
-#define ENTRY(name) GEN_ENTRY(_/**/name)
+#define ALTENTRY(name) GEN_ENTRY(name)
+#define ENTRY(name) GEN_ENTRY(name)
+#define FAKE_MCOUNT(caller)
#define MCOUNT
+#define MCOUNT_LABEL(name)
+#define MEXITCOUNT
+#endif /* GPROF */
-#endif
-
+/* XXX NOP and FASTER_NOP are misleadingly named */
#ifdef DUMMY_NOPS /* this will break some older machines */
#define FASTER_NOP
#define NOP
diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h
index 9fe27ec..c55d629 100644
--- a/sys/amd64/include/profile.h
+++ b/sys/amd64/include/profile.h
@@ -31,35 +31,59 @@
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
- * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $
+ * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $
*/
-#ifndef _I386_MACHINE_PROFILE_H_
-#define _I386_MACHINE_PROFILE_H_
+#ifndef _MACHINE_PROFILE_H_
+#define _MACHINE_PROFILE_H_
+#if 0
#define _MCOUNT_DECL static inline void _mcount
#define MCOUNT \
extern void mcount() asm("mcount"); void mcount() { \
- int selfpc, frompcindex; \
+ fptrint_t selfpc, frompc; \
/* \
- * find the return address for mcount, \
+ * Find the return address for mcount, \
* and the return address for mcount's caller. \
* \
- * selfpc = pc pushed by mcount call \
+ * selfpc = pc pushed by call to mcount \
*/ \
asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \
/* \
- * frompcindex = pc pushed by jsr into self. \
- * In GCC the caller's stack frame has already been built so we \
- * have to chase a6 to find caller's raddr. \
+ * frompc = pc pushed by call to mcount's caller. \
+ * The caller's stack frame has already been built, so %ebp is \
+ * the caller's frame pointer. The caller's raddr is in the \
+ * caller's frame following the caller's caller's frame pointer. \
*/ \
- asm("movl (%%ebp),%0" : "=r" (frompcindex)); \
- frompcindex = ((int *)frompcindex)[1]; \
- _mcount(frompcindex, selfpc); \
+ asm("movl (%%ebp),%0" : "=r" (frompc)); \
+ frompc = ((fptrint_t *)frompc)[1]; \
+ _mcount(frompc, selfpc); \
}
+#else
+#define _MCOUNT_DECL void mcount
+#define MCOUNT
+#endif
-#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr()
-#define MCOUNT_EXIT write_eflags(save_eflags)
+#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); }
+#define MCOUNT_EXIT (write_eflags(save_eflags))
-#endif
+#define CALIB_SCALE 1000
+#define KCOUNT(p,index) ((p)->kcount[(index) \
+ / (HISTFRACTION * sizeof(*(p)->kcount))])
+#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc)
+
+/* An unsigned integral type that can hold function pointers. */
+typedef u_int fptrint_t;
+
+/*
+ * An unsigned integral type that can hold non-negative difference between
+ * function pointers.
+ */
+typedef int fptrdiff_t;
+
+u_int cputime __P((void));
+void mcount __P((fptrint_t frompc, fptrint_t selfpc));
+void mexitcount __P((fptrint_t selfpc));
+
+#endif /* !MACHINE_PROFILE_H */
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index fda656d..1633ed8 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -1,7 +1,7 @@
# This file tells config what files go into building a kernel,
# files marked standard are always included.
#
-# $Id: files.i386,v 1.120 1995/12/26 12:50:01 bde Exp $
+# $Id: files.i386,v 1.121 1995/12/26 13:57:56 bde Exp $
#
aic7xxx_asm optional ahc device-driver \
dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \
@@ -115,6 +115,7 @@ i386/isa/pcvt/pcvt_kbd.c optional vt device-driver
i386/isa/pcvt/pcvt_out.c optional vt device-driver
i386/isa/pcvt/pcvt_sup.c optional vt device-driver
i386/isa/pcvt/pcvt_vtf.c optional vt device-driver
+i386/isa/prof_machdep.c optional profiling-routine
i386/isa/psm.c optional psm device-driver
i386/isa/random_machdep.c standard
i386/isa/rc.c optional rc device-driver
diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386
index fda656d..1633ed8 100644
--- a/sys/i386/conf/files.i386
+++ b/sys/i386/conf/files.i386
@@ -1,7 +1,7 @@
# This file tells config what files go into building a kernel,
# files marked standard are always included.
#
-# $Id: files.i386,v 1.120 1995/12/26 12:50:01 bde Exp $
+# $Id: files.i386,v 1.121 1995/12/26 13:57:56 bde Exp $
#
aic7xxx_asm optional ahc device-driver \
dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \
@@ -115,6 +115,7 @@ i386/isa/pcvt/pcvt_kbd.c optional vt device-driver
i386/isa/pcvt/pcvt_out.c optional vt device-driver
i386/isa/pcvt/pcvt_sup.c optional vt device-driver
i386/isa/pcvt/pcvt_vtf.c optional vt device-driver
+i386/isa/prof_machdep.c optional profiling-routine
i386/isa/psm.c optional psm device-driver
i386/isa/random_machdep.c standard
i386/isa/rc.c optional rc device-driver
diff --git a/sys/i386/include/asmacros.h b/sys/i386/include/asmacros.h
index b2a6dc8..8776ccf 100644
--- a/sys/i386/include/asmacros.h
+++ b/sys/i386/include/asmacros.h
@@ -30,7 +30,7 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
- * $Id: asmacros.h,v 1.4 1994/08/19 11:20:11 jkh Exp $
+ * $Id: asmacros.h,v 1.5 1994/09/08 12:25:18 bde Exp $
*/
#ifndef _MACHINE_ASMACROS_H_
@@ -38,47 +38,83 @@
#ifdef KERNEL
+/* XXX too much duplication in various asm*.h's and gprof.h's */
+
#define ALIGN_DATA .align 2 /* 4 byte alignment, zero filled */
#define ALIGN_TEXT .align 2,0x90 /* 4-byte alignment, nop filled */
#define SUPERALIGN_TEXT .align 4,0x90 /* 16-byte alignment (better for 486), nop filled */
-#define GEN_ENTRY(name) ALIGN_TEXT; .globl name; name:
-#define NON_GPROF_ENTRY(name) GEN_ENTRY(_/**/name)
-
-/* These three are place holders for future changes to the profiling code */
-#define MCOUNT_LABEL(name)
-#define MEXITCOUNT
-#define FAKE_MCOUNT(caller)
+#define GEN_ENTRY(name) ALIGN_TEXT; .globl _/**/name; _/**/name:
+#define NON_GPROF_ENTRY(name) GEN_ENTRY(name)
#ifdef GPROF
/*
- * ALTENTRY() must be before a corresponding ENTRY() so that it can jump
- * over the mcounting.
- */
-#define ALTENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; jmp 2f
-#define ENTRY(name) GEN_ENTRY(_/**/name); MCOUNT; 2:
-/*
- * The call to mcount supports the usual (bad) conventions. We allocate
- * some data and pass a pointer to it although the FreeBSD doesn't use
- * the data. We set up a frame before calling mcount because that is
- * the standard convention although it makes work for both mcount and
- * callers.
+ * __mcount is like mcount except that doesn't require its caller to set
+ * up a frame pointer. It must be called before pushing anything onto the
+ * stack. gcc should eventually generate code to call __mcount in most
+ * cases. This would make -pg in combination with -fomit-frame-pointer
+ * useful. gcc has a configuration variable PROFILE_BEFORE_PROLOGUE to
+ * allow profiling before setting up the frame pointer, but this is
+ * inadequate for good handling of special cases, e.g., -fpic works best
+ * with profiling after the prologue.
+ *
+ * Neither __mcount nor mcount requires %eax to point to 4 bytes of data,
+ * so don't waste space allocating the data or time setting it up. Changes
+ * to avoid the wastage in gcc-2.4.5-compiled code are available.
+ *
+ * mexitcount is a new profiling feature to allow accurate timing of all
+ * functions if an accurate clock is available. Changes to gcc-2.4.5 to
+ * support it are are available. The changes currently don't allow not
+ * generating mexitcounts for non-kernel code. It is best to call
+ * mexitcount right at the end of a function like the MEXITCOUNT macro
+ * does, but the changes to gcc only implement calling it as the first
+ * thing in the epilogue to avoid problems with -fpic.
+ *
+ * mcount and __mexitcount may clobber the call-used registers and %ef.
+ * mexitcount may clobber %ecx and %ef.
+ *
+ * Cross-jumping makes accurate timing more difficult. It is handled in
+ * many cases by calling mexitcount before jumping. It is not handled
+ * for some conditional jumps (e.g., in bcopyx) or for some fault-handling
+ * jumps. It is handled for some fault-handling jumps by not sharing the
+ * exit routine.
+ *
+ * ALTENTRY() must be before a corresponding ENTRY() so that it can jump to
+ * the main entry point. Note that alt entries are counted twice. They
+ * have to be counted as ordinary entries for gprof to get the call times
+ * right for the ordinary entries.
+ *
+ * High local labels are used in macros to avoid clashes with local labels
+ * in functions.
+ *
+ * "ret" is used instead of "RET" because there are a lot of "ret"s.
+ * 0xc3 is the opcode for "ret" (#define ret ... ret fails because this
+ * file is preprocessed in traditional mode). "ret" clobbers eflags
+ * but this doesn't matter.
*/
-#define MCOUNT .data; ALIGN_DATA; 1:; .long 0; .text; \
- pushl %ebp; movl %esp,%ebp; \
- movl $1b,%eax; call mcount; popl %ebp
-#else
+#define ALTENTRY(name) GEN_ENTRY(name) ; MCOUNT ; MEXITCOUNT ; jmp 9f
+#define ENTRY(name) GEN_ENTRY(name) ; 9: ; MCOUNT
+#define FAKE_MCOUNT(caller) pushl caller ; call __mcount ; popl %ecx
+#define MCOUNT call __mcount
+#define MCOUNT_LABEL(name) GEN_ENTRY(name) ; nop ; ALIGN_TEXT
+#define MEXITCOUNT call mexitcount
+#define ret MEXITCOUNT ; .byte 0xc3
+#else /* not GPROF */
/*
* ALTENTRY() has to align because it is before a corresponding ENTRY().
* ENTRY() has to align to because there may be no ALTENTRY() before it.
- * If there is a previous ALTENTRY() then the alignment code is empty.
+ * If there is a previous ALTENTRY() then the alignment code for ENTRY()
+ * is empty.
*/
-#define ALTENTRY(name) GEN_ENTRY(_/**/name)
-#define ENTRY(name) GEN_ENTRY(_/**/name)
+#define ALTENTRY(name) GEN_ENTRY(name)
+#define ENTRY(name) GEN_ENTRY(name)
+#define FAKE_MCOUNT(caller)
#define MCOUNT
+#define MCOUNT_LABEL(name)
+#define MEXITCOUNT
+#endif /* GPROF */
-#endif
-
+/* XXX NOP and FASTER_NOP are misleadingly named */
#ifdef DUMMY_NOPS /* this will break some older machines */
#define FASTER_NOP
#define NOP
diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h
index 9fe27ec..c55d629 100644
--- a/sys/i386/include/profile.h
+++ b/sys/i386/include/profile.h
@@ -31,35 +31,59 @@
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
- * $Id: profile.h,v 1.3 1994/08/21 04:55:29 paul Exp $
+ * $Id: profile.h,v 1.4 1994/09/15 16:27:14 paul Exp $
*/
-#ifndef _I386_MACHINE_PROFILE_H_
-#define _I386_MACHINE_PROFILE_H_
+#ifndef _MACHINE_PROFILE_H_
+#define _MACHINE_PROFILE_H_
+#if 0
#define _MCOUNT_DECL static inline void _mcount
#define MCOUNT \
extern void mcount() asm("mcount"); void mcount() { \
- int selfpc, frompcindex; \
+ fptrint_t selfpc, frompc; \
/* \
- * find the return address for mcount, \
+ * Find the return address for mcount, \
* and the return address for mcount's caller. \
* \
- * selfpc = pc pushed by mcount call \
+ * selfpc = pc pushed by call to mcount \
*/ \
asm("movl 4(%%ebp),%0" : "=r" (selfpc)); \
/* \
- * frompcindex = pc pushed by jsr into self. \
- * In GCC the caller's stack frame has already been built so we \
- * have to chase a6 to find caller's raddr. \
+ * frompc = pc pushed by call to mcount's caller. \
+ * The caller's stack frame has already been built, so %ebp is \
+ * the caller's frame pointer. The caller's raddr is in the \
+ * caller's frame following the caller's caller's frame pointer. \
*/ \
- asm("movl (%%ebp),%0" : "=r" (frompcindex)); \
- frompcindex = ((int *)frompcindex)[1]; \
- _mcount(frompcindex, selfpc); \
+ asm("movl (%%ebp),%0" : "=r" (frompc)); \
+ frompc = ((fptrint_t *)frompc)[1]; \
+ _mcount(frompc, selfpc); \
}
+#else
+#define _MCOUNT_DECL void mcount
+#define MCOUNT
+#endif
-#define MCOUNT_ENTER save_eflags = read_eflags(); disable_intr()
-#define MCOUNT_EXIT write_eflags(save_eflags)
+#define MCOUNT_ENTER { save_eflags = read_eflags(); disable_intr(); }
+#define MCOUNT_EXIT (write_eflags(save_eflags))
-#endif
+#define CALIB_SCALE 1000
+#define KCOUNT(p,index) ((p)->kcount[(index) \
+ / (HISTFRACTION * sizeof(*(p)->kcount))])
+#define PC_TO_I(p, pc) ((fptrint_t)(pc) - (fptrint_t)(p)->lowpc)
+
+/* An unsigned integral type that can hold function pointers. */
+typedef u_int fptrint_t;
+
+/*
+ * An unsigned integral type that can hold non-negative difference between
+ * function pointers.
+ */
+typedef int fptrdiff_t;
+
+u_int cputime __P((void));
+void mcount __P((fptrint_t frompc, fptrint_t selfpc));
+void mexitcount __P((fptrint_t selfpc));
+
+#endif /* !MACHINE_PROFILE_H */
diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c
new file mode 100644
index 0000000..2aa6787
--- /dev/null
+++ b/sys/i386/isa/prof_machdep.c
@@ -0,0 +1,153 @@
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <machine/clock.h>
+#include <i386/isa/isa.h>
+#include <i386/isa/timerreg.h>
+
+#ifdef GUPROF
+extern u_int cputime __P((void));
+#endif
+
+#ifdef __GNUC__
+asm("
+GM_STATE = 0
+GMON_PROF_OFF = 3
+
+ .text
+ .align 4,0x90
+ .globl __mcount
+__mcount:
+ #
+ # Check that we are profiling. Do it early for speed.
+ #
+ cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE
+ je Lmcount_exit
+ #
+ # __mcount is the same as mcount except the caller hasn't changed
+ # the stack except to call here, so the caller's raddr is above
+ # our raddr.
+ #
+ movl 4(%esp),%edx
+ jmp Lgot_frompc
+
+ .align 4,0x90
+ .globl mcount
+mcount:
+ cmpl $GMON_PROF_OFF,__gmonparam+GM_STATE
+ je Lmcount_exit
+ #
+ # The caller's stack frame has already been built, so %ebp is
+ # the caller's frame pointer. The caller's raddr is in the
+ # caller's frame following the caller's caller's frame pointer.
+ #
+ movl 4(%ebp),%edx
+Lgot_frompc:
+ #
+ # Our raddr is the caller's pc.
+ #
+ movl (%esp),%eax
+
+ pushf
+ pushl %eax
+ pushl %edx
+ cli
+ call _mcount
+ addl $8,%esp
+ popf
+Lmcount_exit:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+
+#ifdef GUPROF
+/*
+ * mexitcount saves the return register(s), loads selfpc and calls
+ * mexitcount(selfpc) to do the work. Someday it should be in a machine
+ * dependent file together with cputime(), __mcount and mcount. cputime()
+ * can't just be put in machdep.c because it has to be compiled without -pg.
+ */
+#ifdef __GNUC__
+asm("
+ .text
+#
+# Dummy label to be seen when gprof -u hides mexitcount.
+#
+ .align 4,0x90
+ .globl __mexitcount
+__mexitcount:
+ nop
+
+GMON_PROF_HIRES = 4
+
+ .align 4,0x90
+ .globl mexitcount
+mexitcount:
+ cmpl $GMON_PROF_HIRES,__gmonparam+GM_STATE
+ jne Lmexitcount_exit
+ pushl %edx
+ pushl %eax
+ movl 8(%esp),%eax
+ pushf
+ pushl %eax
+ cli
+ call _mexitcount
+ addl $4,%esp
+ popf
+ popl %eax
+ popl %edx
+Lmexitcount_exit:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+
+/*
+ * Return the time elapsed since the last call. The units are machine-
+ * dependent.
+ */
+u_int
+cputime()
+{
+ u_int count;
+ u_int delta;
+ u_char low;
+ static u_int prev_count;
+
+ /*
+ * Read the current value of the 8254 timer counter 0.
+ */
+ outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+ low = inb(TIMER_CNTR0);
+ count = low | (inb(TIMER_CNTR0) << 8);
+
+ /*
+ * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
+ * While profiling is enabled, this routine is called at least twice
+ * per timer reset (for mcounting and mexitcounting hardclock()),
+ * so at most one reset has occurred since the last call, and one
+ * has occurred iff the current count is larger than the previous
+ * count. This allows counter underflow to be detected faster
+ * than in microtime().
+ */
+ delta = prev_count - count;
+ prev_count = count;
+ if ((int) delta <= 0)
+ return (delta + timer0_max_count);
+ return (delta);
+}
+#else /* not GUPROF */
+#ifdef __GNUC__
+asm("
+ .text
+ .align 4,0x90
+ .globl mexitcount
+mexitcount:
+ ret
+");
+#else /* !__GNUC__ */
+#error
+#endif /* __GNUC__ */
+#endif /* GUPROF */
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index b9c1ae2..0727f9b 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
- * $Id: subr_prof.c,v 1.14 1995/12/14 08:31:44 phk Exp $
+ * $Id: subr_prof.c,v 1.15 1995/12/26 01:21:39 bde Exp $
*/
#include <sys/param.h>
@@ -62,6 +62,11 @@ kmstartup(dummy)
{
char *cp;
struct gmonparam *p = &_gmonparam;
+#ifdef GUPROF
+ fptrint_t kmstartup_addr;
+ int i;
+#endif
+
/*
* Round lowpc and highpc to multiples of the density we're using
* so the rest of the scaling (here and in gprof) stays in ints.
@@ -89,9 +94,74 @@ kmstartup(dummy)
bzero(cp, p->kcountsize + p->tossize + p->fromssize);
p->tos = (struct tostruct *)cp;
cp += p->tossize;
- p->kcount = (u_short *)cp;
+ p->kcount = (HISTCOUNTER *)cp;
cp += p->kcountsize;
p->froms = (u_short *)cp;
+
+#ifdef GUPROF
+ /*
+ * Initialize pointers to overhead counters.
+ */
+ p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime));
+ p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount));
+ p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount));
+
+ /*
+ * Determine overheads.
+ */
+ disable_intr();
+ p->state = GMON_PROF_HIRES;
+
+ p->cputime_overhead = 0;
+ (void)cputime();
+ for (i = 0; i < CALIB_SCALE; i++)
+ p->cputime_overhead += cputime();
+
+ (void)cputime();
+ for (i = 0; i < CALIB_SCALE; i++)
+#if defined(i386) && __GNUC__ >= 2
+ /*
+ * Underestimate slightly by always calling __mcount, never
+ * mcount.
+ */
+ asm("pushl %0; call __mcount; popl %%ecx"
+ :
+ : "i" (kmstartup)
+ : "ax", "bx", "cx", "dx", "memory");
+#else
+#error
+#endif
+ p->mcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup));
+
+ (void)cputime();
+ for (i = 0; i < CALIB_SCALE; i++)
+#if defined(i386) && __GNUC__ >= 2
+ asm("call mexitcount; 1:"
+ : : : "ax", "bx", "cx", "dx", "memory");
+ asm("movl $1b,%0" : "=rm" (kmstartup_addr));
+#else
+#error
+#endif
+ p->mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr));
+
+ p->state = GMON_PROF_OFF;
+ enable_intr();
+
+ p->mcount_overhead_sub = p->mcount_overhead - p->cputime_overhead;
+ p->mexitcount_overhead_sub = p->mexitcount_overhead
+ - p->cputime_overhead;
+ printf("Profiling overheads: %u+%u %u+%u\n",
+ p->cputime_overhead, p->mcount_overhead_sub,
+ p->cputime_overhead, p->mexitcount_overhead_sub);
+ p->cputime_overhead_frac = p->cputime_overhead % CALIB_SCALE;
+ p->cputime_overhead /= CALIB_SCALE;
+ p->mcount_overhead_frac = p->mcount_overhead_sub % CALIB_SCALE;
+ p->mcount_overhead_sub /= CALIB_SCALE;
+ p->mcount_overhead /= CALIB_SCALE;
+ p->mexitcount_overhead_frac = p->mexitcount_overhead_sub % CALIB_SCALE;
+ p->mexitcount_overhead_sub /= CALIB_SCALE;
+ p->mexitcount_overhead /= CALIB_SCALE;
+#endif /* GUPROF */
}
/*
@@ -104,6 +174,7 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS
u_int namelen = arg2;
struct gmonparam *gp = &_gmonparam;
int error;
+ int state;
/* all sysctl names at this level are terminal */
if (namelen != 1)
@@ -111,13 +182,27 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS
switch (name[0]) {
case GPROF_STATE:
- error = sysctl_handle_int(oidp, &gp->state, 0, req);
+ state = gp->state;
+ error = sysctl_handle_int(oidp, &state, 0, req);
if (error)
return (error);
- if (gp->state == GMON_PROF_OFF)
+ if (!req->newptr)
+ return (0);
+ if (state == GMON_PROF_OFF) {
stopprofclock(&proc0);
- else
+ gp->state = state;
+ } else if (state == GMON_PROF_ON) {
+ gp->profrate = profhz;
+ gp->state = state;
startprofclock(&proc0);
+#ifdef GUPROF
+ } else if (state == GMON_PROF_HIRES) {
+ gp->profrate = 1193182; /* XXX */
+ stopprofclock(&proc0);
+ gp->state = state;
+#endif
+ } else if (state != gp->state)
+ return (EINVAL);
return (0);
case GPROF_COUNT:
return (sysctl_handle_opaque(oidp,
diff --git a/sys/libkern/mcount.c b/sys/libkern/mcount.c
index fc3625b..ed0e68d 100644
--- a/sys/libkern/mcount.c
+++ b/sys/libkern/mcount.c
@@ -1,180 +1,4 @@
-/*-
- * Copyright (c) 1983, 1992, 1993
- * The Regents of the University of California. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $Id: mcount.c,v 1.3 1994/09/15 15:41:48 paul Exp $
- */
+#define GPROF4 1 /* XXX can't get at kernel options */
+#define GUPROF 1 /* XXX can't get at kernel options */
-#include <sys/param.h>
-#include <sys/gmon.h>
-#ifdef KERNEL
-#include <i386/include/cpufunc.h>
-#endif
-
-/*
- * mcount is called on entry to each function compiled with the profiling
- * switch set. _mcount(), which is declared in a machine-dependent way
- * with _MCOUNT_DECL, does the actual work and is either inlined into a
- * C routine or called by an assembly stub. In any case, this magic is
- * taken care of by the MCOUNT definition in <machine/profile.h>.
- *
- * _mcount updates data structures that represent traversals of the
- * program's call graph edges. frompc and selfpc are the return
- * address and function address that represents the given call graph edge.
- *
- * Note: the original BSD code used the same variable (frompcindex) for
- * both frompcindex and frompc. Any reasonable, modern compiler will
- * perform this optimization.
- */
-_MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */
- register u_long frompc, selfpc;
-{
- register u_short *frompcindex;
- register struct tostruct *top, *prevtop;
- register struct gmonparam *p;
- register long toindex;
-#ifdef KERNEL
- register int s;
- u_long save_eflags;
-#endif
-
- p = &_gmonparam;
- /*
- * check that we are profiling
- * and that we aren't recursively invoked.
- */
- if (p->state != GMON_PROF_ON)
- return;
-#ifdef KERNEL
- MCOUNT_ENTER;
-#else
- p->state = GMON_PROF_BUSY;
-#endif
- /*
- * check that frompcindex is a reasonable pc value.
- * for example: signal catchers get called from the stack,
- * not from text space. too bad.
- */
- frompc -= p->lowpc;
- if (frompc > p->textsize)
- goto done;
-
- frompcindex = &p->froms[frompc / (p->hashfraction * sizeof(*p->froms))];
- toindex = *frompcindex;
- if (toindex == 0) {
- /*
- * first time traversing this arc
- */
- toindex = ++p->tos[0].link;
- if (toindex >= p->tolimit)
- /* halt further profiling */
- goto overflow;
-
- *frompcindex = toindex;
- top = &p->tos[toindex];
- top->selfpc = selfpc;
- top->count = 1;
- top->link = 0;
- goto done;
- }
- top = &p->tos[toindex];
- if (top->selfpc == selfpc) {
- /*
- * arc at front of chain; usual case.
- */
- top->count++;
- goto done;
- }
- /*
- * have to go looking down chain for it.
- * top points to what we are looking at,
- * prevtop points to previous top.
- * we know it is not at the head of the chain.
- */
- for (; /* goto done */; ) {
- if (top->link == 0) {
- /*
- * top is end of the chain and none of the chain
- * had top->selfpc == selfpc.
- * so we allocate a new tostruct
- * and link it to the head of the chain.
- */
- toindex = ++p->tos[0].link;
- if (toindex >= p->tolimit)
- goto overflow;
-
- top = &p->tos[toindex];
- top->selfpc = selfpc;
- top->count = 1;
- top->link = *frompcindex;
- *frompcindex = toindex;
- goto done;
- }
- /*
- * otherwise, check the next arc on the chain.
- */
- prevtop = top;
- top = &p->tos[top->link];
- if (top->selfpc == selfpc) {
- /*
- * there it is.
- * increment its count
- * move it to the head of the chain.
- */
- top->count++;
- toindex = prevtop->link;
- prevtop->link = top->link;
- top->link = *frompcindex;
- *frompcindex = toindex;
- goto done;
- }
-
- }
-done:
-#ifdef KERNEL
- MCOUNT_EXIT;
-#else
- p->state = GMON_PROF_ON;
-#endif
- return;
-overflow:
- p->state = GMON_PROF_ERROR;
-#ifdef KERNEL
- MCOUNT_EXIT;
-#endif
- return;
-}
-
-/*
- * Actual definition of mcount function. Defined in <machine/profile.h>,
- * which is included by <sys/gmon.h>.
- */
-MCOUNT
+#include "../lib/libc/gmon/mcount.c" /* XXX */
diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h
index 9b3882d..619e94c 100644
--- a/sys/sys/gmon.h
+++ b/sys/sys/gmon.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)gmon.h 8.2 (Berkeley) 1/4/94
- * $Id: gmon.h,v 1.6 1995/05/30 08:14:22 rgrimes Exp $
+ * $Id: gmon.h,v 1.7 1995/08/29 03:09:14 bde Exp $
*/
#ifndef _SYS_GMON_H_
@@ -49,18 +49,33 @@ struct gmonhdr {
int version; /* version number */
int profrate; /* profiling clock rate */
int spare[3]; /* reserved */
+ /* XXX should record counter size and density */
};
#define GMONVERSION 0x00051879
/*
- * histogram counters are unsigned shorts (according to the kernel).
+ * Type of histogram counters used in the kernel.
*/
+#ifdef GPROF4
+#define HISTCOUNTER unsigned
+#else
#define HISTCOUNTER unsigned short
+#endif
/*
- * fraction of text space to allocate for histogram counters here, 1/2
+ * Fraction of text space to allocate for histogram counters.
+ * We allocate counters at the same or higher density as function
+ * addresses, so that each counter belongs to a unique function.
+ * A lower density of counters would give less resolution but a
+ * higher density would be wasted.
+ *
+ * Assume that function addresses are at least 4-byte-aligned.
+ * It would be better to get the linker to align functions more
+ * strictly so that we could use smaller tables.
*/
-#define HISTFRACTION 2
+#define FUNCTION_ALIGNMENT 4
+#define HISTFRACTION (FUNCTION_ALIGNMENT / sizeof(HISTCOUNTER) == 0 \
+ ? 1 : FUNCTION_ALIGNMENT / sizeof(HISTCOUNTER))
/*
* Fraction of text space to allocate for from hash buckets.
@@ -90,7 +105,23 @@ struct gmonhdr {
* profiling data structures without (in practice) sacrificing
* any granularity.
*/
-#define HASHFRACTION 2
+/*
+ * XXX I think the above analysis completely misses the point. I think
+ * the point is that addresses in different functions must hash to
+ * different values. Since the hash is essentially division by
+ * sizeof(unsigned short), the correct formula is:
+ *
+ * HASHFRACTION = MIN_FUNCTION_ALIGNMENT / sizeof(unsigned short)
+ *
+ * Note that he unsigned short here has nothing to do with the one for
+ * HISTFRACTION.
+ *
+ * Hash collisions from a two call sequence don't matter. They get
+ * handled like collisions for calls to different addresses from the
+ * same address through a function pointer.
+ */
+#define HASHFRACTION (FUNCTION_ALIGNMENT / sizeof(unsigned short) == 0 \
+ ? 1 : FUNCTION_ALIGNMENT / sizeof(unsigned short))
/*
* percent of text space to allocate for tostructs with a minimum.
@@ -132,17 +163,33 @@ struct rawarc {
*/
struct gmonparam {
int state;
- u_short *kcount;
+ HISTCOUNTER *kcount;
u_long kcountsize;
u_short *froms;
u_long fromssize;
struct tostruct *tos;
u_long tossize;
long tolimit;
- u_long lowpc;
- u_long highpc;
+ fptrint_t lowpc;
+ fptrint_t highpc;
u_long textsize;
u_long hashfraction;
+ u_long profrate;
+ HISTCOUNTER *cputime_count;
+ u_int cputime_overhead;
+ u_int cputime_overhead_frac;
+ u_int cputime_overhead_resid;
+ u_int cputime_overhead_sub;
+ HISTCOUNTER *mcount_count;
+ u_int mcount_overhead;
+ u_int mcount_overhead_frac;
+ u_int mcount_overhead_resid;
+ u_int mcount_overhead_sub;
+ HISTCOUNTER *mexitcount_count;
+ u_int mexitcount_overhead;
+ u_int mexitcount_overhead_frac;
+ u_int mexitcount_overhead_resid;
+ u_int mexitcount_overhead_sub;
};
extern struct gmonparam _gmonparam;
@@ -153,6 +200,7 @@ extern struct gmonparam _gmonparam;
#define GMON_PROF_BUSY 1
#define GMON_PROF_ERROR 2
#define GMON_PROF_OFF 3
+#define GMON_PROF_HIRES 4
/*
* Sysctl definitions for extracting profiling information from the kernel.
diff --git a/usr.bin/Makefile b/usr.bin/Makefile
index 4f2edb9..37844eb 100644
--- a/usr.bin/Makefile
+++ b/usr.bin/Makefile
@@ -1,5 +1,5 @@
# From: @(#)Makefile 8.3 (Berkeley) 1/7/94
-# $Id: Makefile,v 1.62 1995/10/03 12:29:28 bde Exp $
+# $Id: Makefile,v 1.63 1995/10/23 16:44:22 peter Exp $
# XXX MISSING: deroff diction graph learn plot
# spell spline struct units xsend
@@ -25,6 +25,7 @@ SUBDIR= apply ar at banner basename biff cal calendar \
unexpand unifdef uniq unvis users uudecode uuencode vacation \
vgrind vi vis w wall wc what whereis which who whois window \
write xargs xinstall xstr yacc yes ypcat ypmatch ypwhich
+SUBDIR+=gprof4
.if !exists(../secure) || defined(NOSECURE)
SUBDIR+=telnet
diff --git a/usr.bin/gprof4/Makefile b/usr.bin/gprof4/Makefile
new file mode 100644
index 0000000..f5c2c83
--- /dev/null
+++ b/usr.bin/gprof4/Makefile
@@ -0,0 +1,14 @@
+# This was cloned from the Makefile for gprof by changing PROG from gprof
+# to gprof4, adding NOMAN and PATH, adding -DGPROF4 to CFLAGS and deleting
+# beforeinstall.
+
+# @(#)Makefile 5.17 (Berkeley) 5/11/90
+
+PROG= gprof4
+NOMAN= noman
+SRCS= gprof.c arcs.c dfn.c lookup.c ${MACHINE}.c hertz.c \
+ printgprof.c printlist.c
+CFLAGS+=-DGPROF4
+.PATH: ${.CURDIR}/../../usr.bin/gprof
+
+.include <bsd.prog.mk>
diff --git a/usr.sbin/config/config.8 b/usr.sbin/config/config.8
index 5f93a86..32cb360 100644
--- a/usr.sbin/config/config.8
+++ b/usr.sbin/config/config.8
@@ -88,6 +88,11 @@ will configure a system for profiling; for example,
.Xr kgmon 8
and
.Xr gprof 1 .
+If two or more
+.Fl p
+options are supplied,
+.Nm config
+will configure a system for high resolution profiling.
.It Fl n
If the
.Fl n
diff --git a/usr.sbin/config/mkmakefile.c b/usr.sbin/config/mkmakefile.c
index 8f6ca3a..f8e01d0 100644
--- a/usr.sbin/config/mkmakefile.c
+++ b/usr.sbin/config/mkmakefile.c
@@ -157,8 +157,10 @@ makefile()
}
fprintf(ofp, "KERN_IDENT=%s\n", raise(ident));
fprintf(ofp, "IDENT=");
- if (profiling)
+ if (profiling >= 1)
fprintf(ofp, " -DGPROF");
+ if (profiling >= 2)
+ fprintf(ofp, " -DGPROF4 -DGUPROF");
if (cputype == 0) {
printf("cpu type must be specified\n");
@@ -202,8 +204,10 @@ makefile()
fprintf(ofp, "%s=%s\n", op->op_name, op->op_value);
if (debugging)
fprintf(ofp, "DEBUG=-g\n");
- if (profiling)
+ if (profiling >= 1)
fprintf(ofp, "PROF=-pg\n");
+ if (profiling >= 2)
+ fprintf(ofp, "PROF+=-mprofiler-epilogue\n");
while (fgets(line, BUFSIZ, ifp) != 0) {
if (*line != '%') {
fprintf(ofp, "%s", line);
diff --git a/usr.sbin/kgmon/kgmon.8 b/usr.sbin/kgmon/kgmon.8
index 114fea8..4a91582 100644
--- a/usr.sbin/kgmon/kgmon.8
+++ b/usr.sbin/kgmon/kgmon.8
@@ -39,7 +39,7 @@
.Nd generate a dump of the operating system's profile buffers
.Sh SYNOPSIS
.Nm kgmon
-.Op Fl bhpr
+.Op Fl Bbhpr
.Op Fl M core
.Op Fl N system
.Sh DESCRIPTION
@@ -62,8 +62,10 @@ file suitable for later analysis by
.Pp
The options are as follows:
.Bl -tag -width Ds
+.It Fl B
+Resume the collection of high resolution profile data.
.It Fl b
-Resume the collection of profile data.
+Resume the collection of low resolution profile data.
.It Fl h
Stop the collection of profile data.
.It Fl p
@@ -86,6 +88,8 @@ default ``/kernel''.
.El
.Pp
If neither
+.Fl B
+nor
.Fl b
nor
.Fl h
@@ -96,6 +100,9 @@ flag is specified and profile data is being collected,
profiling will be momentarily suspended,
the operating system profile buffers will be dumped,
and profiling will be immediately resumed.
+.Pp
+The profile buffers should be reset when the resolution
+of the profile data is changed.
.Sh FILES
.Bl -tag -width /dev/kmemx -compact
.It Pa /kernel
diff --git a/usr.sbin/kgmon/kgmon.c b/usr.sbin/kgmon/kgmon.c
index 1a2923b..c283f16 100644
--- a/usr.sbin/kgmon/kgmon.c
+++ b/usr.sbin/kgmon/kgmon.c
@@ -69,7 +69,7 @@ struct kvmvars {
struct gmonparam gpm;
};
-int bflag, hflag, kflag, rflag, pflag;
+int Bflag, bflag, hflag, kflag, rflag, pflag;
int debug = 0;
void setprof __P((struct kvmvars *kvp, int state));
void dumpstate __P((struct kvmvars *kvp));
@@ -87,7 +87,7 @@ main(int argc, char **argv)
seteuid(getuid());
kmemf = NULL;
system = NULL;
- while ((ch = getopt(argc, argv, "M:N:bhpr")) != EOF) {
+ while ((ch = getopt(argc, argv, "M:N:Bbhpr")) != EOF) {
switch((char)ch) {
case 'M':
@@ -99,6 +99,10 @@ main(int argc, char **argv)
system = optarg;
break;
+ case 'B':
+ Bflag = 1;
+ break;
+
case 'b':
bflag = 1;
break;
@@ -117,7 +121,7 @@ main(int argc, char **argv)
default:
(void)fprintf(stderr,
- "usage: kgmon [-bhrp] [-M core] [-N system]\n");
+ "usage: kgmon [-Bbhrp] [-M core] [-N system]\n");
exit(1);
}
}
@@ -140,6 +144,8 @@ main(int argc, char **argv)
mode = getprof(&kvmvars);
if (hflag)
disp = GMON_PROF_OFF;
+ else if (Bflag)
+ disp = GMON_PROF_HIRES;
else if (bflag)
disp = GMON_PROF_ON;
else
@@ -151,7 +157,12 @@ main(int argc, char **argv)
if (accessmode == O_RDWR)
setprof(&kvmvars, disp);
(void)fprintf(stdout, "kgmon: kernel profiling is %s.\n",
- disp == GMON_PROF_OFF ? "off" : "running");
+ disp == GMON_PROF_OFF ? "off" :
+ disp == GMON_PROF_HIRES ? "running (high resolution)" :
+ disp == GMON_PROF_ON ? "running" :
+ disp == GMON_PROF_BUSY ? "busy" :
+ disp == GMON_PROF_ERROR ? "off (error)" :
+ "in an unknown state");
return (0);
}
@@ -176,8 +187,9 @@ openfiles(system, kmemf, kvp)
"kgmon: profiling not defined in kernel.\n");
exit(20);
}
- if (!(bflag || hflag || rflag ||
- (pflag && state == GMON_PROF_ON)))
+ if (!(Bflag || bflag || hflag || rflag ||
+ (pflag &&
+ (state == GMON_PROF_HIRES || state == GMON_PROF_ON))))
return (O_RDONLY);
(void)seteuid(0);
if (sysctl(mib, 3, NULL, NULL, &state, size) >= 0)
@@ -186,7 +198,8 @@ openfiles(system, kmemf, kvp)
kern_readonly(state);
return (O_RDONLY);
}
- openmode = (bflag || hflag || pflag || rflag) ? O_RDWR : O_RDONLY;
+ openmode = (Bflag || bflag || hflag || pflag || rflag)
+ ? O_RDWR : O_RDONLY;
kvp->kd = kvm_openfiles(system, kmemf, NULL, openmode, errbuf);
if (kvp->kd == NULL) {
if (openmode == O_RDWR) {
@@ -221,15 +234,17 @@ kern_readonly(mode)
{
(void)fprintf(stderr, "kgmon: kernel read-only: ");
- if (pflag && mode == GMON_PROF_ON)
+ if (pflag && (mode == GMON_PROF_HIRES || mode == GMON_PROF_ON))
(void)fprintf(stderr, "data may be inconsistent\n");
if (rflag)
(void)fprintf(stderr, "-r supressed\n");
+ if (Bflag)
+ (void)fprintf(stderr, "-B supressed\n");
if (bflag)
(void)fprintf(stderr, "-b supressed\n");
if (hflag)
(void)fprintf(stderr, "-h supressed\n");
- rflag = bflag = hflag = 0;
+ rflag = Bflag = bflag = hflag = 0;
}
/*
@@ -324,7 +339,9 @@ dumpstate(kvp)
h.hpc = kvp->gpm.highpc;
h.ncnt = kvp->gpm.kcountsize + sizeof(h);
h.version = GMONVERSION;
- h.profrate = getprofhz(kvp);
+ h.profrate = kvp->gpm.profrate;
+ if (h.profrate == 0)
+ h.profrate = getprofhz(kvp); /* ancient kernel */
fwrite((char *)&h, sizeof(h), 1, fp);
/*
OpenPOWER on IntegriCloud