summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbde <bde@FreeBSD.org>1996-10-17 19:32:31 +0000
committerbde <bde@FreeBSD.org>1996-10-17 19:32:31 +0000
commita0f16401c5c8aa8537932ccc296e9253fca3debd (patch)
tree2f3c01bb1684b82e42c63c0a17d658ba5ded662b
parentd0d507caa9d6718e7f6699bae600f120362f59b7 (diff)
downloadFreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.zip
FreeBSD-src-a0f16401c5c8aa8537932ccc296e9253fca3debd.tar.gz
Improved non-statistical (GUPROF) profiling:
- use a more accurate and more efficient method of compensating for overheads. The old method counted too much time against leaf functions. - normally use the Pentium timestamp counter if available. On Pentiums, the times are now accurate to within a couple of cpu clock cycles per function call in the (unlikely) event that there are no cache misses in or caused by the profiling code. - optionally use an arbitrary Pentium event counter if available. - optionally regress to using the i8254 counter. - scaled the i8254 counter by a factor of 128. Now the i8254 counters overflow slightly faster than the TSC counters for a 150MHz Pentium :-) (after about 16 seconds). This is to avoid fractional overheads. files.i386: permon.c temporarily has to be classified as a profiling-routine because a couple of functions in it may be called from profiling code. options.i386: - I586_CTR_GUPROF is currently unused (oops). - I586_PMC_GUPROF should be something like 0x70000 to enable (but not use unless prof_machdep.c is changed) support for Pentium event counters. 7 is a control mode and the counter number 0 is somewhere in the 0000 bits (see perfmon.h for the encoding). profile.h: - added declarations. - cleaned up separation of user mode declarations. prof_machdep.c: Mostly clock-select changes. The default clock can be changed by editing kmem. There should be a sysctl for this. subr_prof.c: - added copyright. - calibrate overheads for the new method. - documented new method. - fixed races and and machine dependencies in start/stop code. mcount.c: Use the new overhead compensation method. gmon.h: - changed GPROF4 counter type from unsigned to int. Oops, this should be machine-dependent and/or int32_t. - reorganized overhead counters. Submitted by: Pentium event counter changes mostly by wollman
-rw-r--r--sys/amd64/amd64/prof_machdep.c168
-rw-r--r--sys/amd64/include/profile.h36
-rw-r--r--sys/conf/files.i3863
-rw-r--r--sys/conf/options.i3864
-rw-r--r--sys/i386/conf/files.i3863
-rw-r--r--sys/i386/conf/options.i3864
-rw-r--r--sys/i386/include/profile.h36
-rw-r--r--sys/i386/isa/prof_machdep.c168
-rw-r--r--sys/kern/subr_prof.c186
-rw-r--r--sys/libkern/mcount.c130
-rw-r--r--sys/sys/gmon.h25
11 files changed, 597 insertions, 166 deletions
diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c
index f140b84..62c8df5 100644
--- a/sys/amd64/amd64/prof_machdep.c
+++ b/sys/amd64/amd64/prof_machdep.c
@@ -1,17 +1,64 @@
-/*
- * NEED A COPYRIGHT NOPTICE HERE
+/*-
+ * Copyright (c) 1996 Bruce D. Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * $Id$
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: prof_machdep.c,v 1.2 1996/04/08 16:41:06 wollman Exp $
*/
+
+#ifdef GUPROF
+#include "opt_cpu.h"
+#include "opt_i586_guprof.h"
+#include "opt_perfmon.h"
+
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/gmon.h>
+
#include <machine/clock.h>
+#include <machine/perfmon.h>
+#include <machine/profile.h>
+#endif
+
#include <i386/isa/isa.h>
#include <i386/isa/timerreg.h>
#ifdef GUPROF
-extern u_int cputime __P((void));
+#define CPUTIME_CLOCK_UNINITIALIZED 0
+#define CPUTIME_CLOCK_I8254 1
+#define CPUTIME_CLOCK_I586_CTR 2
+#define CPUTIME_CLOCK_I586_PMC 3
+#define CPUTIME_CLOCK_I8254_SHIFT 7
+
+int cputime_bias = 1; /* initialize for locality of reference */
+
+static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
+#ifdef I586_PMC_GUPROF
+static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF;
+static int cputime_clock_pmc_init;
+static struct gmonparam saved_gmp;
#endif
+#endif /* GUPROF */
#ifdef __GNUC__
asm("
@@ -52,13 +99,13 @@ Lgot_frompc:
#
movl (%esp),%eax
- pushf
+ pushfl
pushl %eax
pushl %edx
cli
call _mcount
addl $8,%esp
- popf
+ popfl
Lmcount_exit:
ret
");
@@ -94,12 +141,12 @@ mexitcount:
pushl %edx
pushl %eax
movl 8(%esp),%eax
- pushf
+ pushfl
pushl %eax
cli
call _mexitcount
addl $4,%esp
- popf
+ popfl
popl %eax
popl %edx
Lmexitcount_exit:
@@ -113,20 +160,48 @@ Lmexitcount_exit:
* Return the time elapsed since the last call. The units are machine-
* dependent.
*/
-u_int
+int
cputime()
{
u_int count;
- u_int delta;
- u_char low;
+ int delta;
+#ifdef I586_PMC_GUPROF
+ u_quad_t event_count;
+#endif
+ u_char high, low;
static u_int prev_count;
+#if defined(I586_CPU) || defined(I686_CPU)
+ if (cputime_clock == CPUTIME_CLOCK_I586_CTR) {
+ count = (u_int)rdtsc();
+ delta = (int)(count - prev_count);
+ prev_count = count;
+ return (delta);
+ }
+#ifdef I586_PMC_GUPROF
+ if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
+ /*
+ * XXX permon_read() should be inlined so that the
+ * perfmon module doesn't need to be compiled with
+ * profiling disabled and so that it is fast.
+ */
+ perfmon_read(0, &event_count);
+
+ count = (u_int)event_count;
+ delta = (int)(count - prev_count);
+ prev_count = count;
+ return (delta);
+ }
+#endif /* I586_PMC_GUPROF */
+#endif /* I586_CPU or I686_CPU */
+
/*
* Read the current value of the 8254 timer counter 0.
*/
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
low = inb(TIMER_CNTR0);
- count = low | (inb(TIMER_CNTR0) << 8);
+ high = inb(TIMER_CNTR0);
+ count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
/*
* The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
@@ -140,10 +215,75 @@ cputime()
delta = prev_count - count;
prev_count = count;
if ((int) delta <= 0)
- return (delta + timer0_max_count);
+ return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT));
return (delta);
}
-#else /* not GUPROF */
+
+/*
+ * The start and stop routines need not be here since we turn off profiling
+ * before calling them. They are here for convenience.
+ */
+
+void
+startguprof(gp)
+ struct gmonparam *gp;
+{
+ if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
+ cputime_clock = CPUTIME_CLOCK_I8254;
+#if defined(I586_CPU) || defined(I686_CPU)
+ if (i586_ctr_freq != 0)
+ cputime_clock = CPUTIME_CLOCK_I586_CTR;
+#endif
+ }
+ gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT;
+#if defined(I586_CPU) || defined(I686_CPU)
+ if (cputime_clock == CPUTIME_CLOCK_I586_CTR)
+ gp->profrate = i586_ctr_freq;
+#ifdef I586_PMC_GUPROF
+ else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
+ if (perfmon_avail() &&
+ perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
+ if (perfmon_start(0) != 0)
+ perfmon_fini(0);
+ else {
+ /* XXX 1 event == 1 us. */
+ gp->profrate = 1000000;
+
+ saved_gmp = *gp;
+
+ /* Zap overheads. They are invalid. */
+ gp->cputime_overhead = 0;
+ gp->mcount_overhead = 0;
+ gp->mcount_post_overhead = 0;
+ gp->mcount_pre_overhead = 0;
+ gp->mexitcount_overhead = 0;
+ gp->mexitcount_post_overhead = 0;
+ gp->mexitcount_pre_overhead = 0;
+
+ cputime_clock_pmc_init = TRUE;
+ }
+ }
+ }
+#endif /* I586_PMC_GUPROF */
+#endif /* I586_CPU or I686_CPU */
+ cputime_bias = 0;
+ cputime();
+}
+
+void
+stopguprof(gp)
+ struct gmonparam *gp;
+{
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+ if (cputime_clock_pmc_init) {
+ *gp = saved_gmp;
+ perfmon_fini(0);
+ cputime_clock_pmc_init = FALSE;
+ }
+#endif
+}
+
+#else /* !GUPROF */
#ifdef __GNUC__
asm("
.text
diff --git a/sys/amd64/include/profile.h b/sys/amd64/include/profile.h
index 4b9d51b..08d0dbd 100644
--- a/sys/amd64/include/profile.h
+++ b/sys/amd64/include/profile.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
- * $Id: profile.h,v 1.6 1996/01/01 17:11:21 bde Exp $
+ * $Id: profile.h,v 1.7 1996/08/28 20:15:25 bde Exp $
*/
#ifndef _MACHINE_PROFILE_H_
@@ -97,18 +97,44 @@ typedef u_int fptrint_t;
*/
typedef int fptrdiff_t;
-__BEGIN_DECLS
#ifdef KERNEL
+
void mcount __P((fptrint_t frompc, fptrint_t selfpc));
+
+#ifdef GUPROF
+struct gmonparam;
+
+void nullfunc_loop_profiled __P((void));
+void nullfunc_profiled __P((void));
+void startguprof __P((struct gmonparam *p));
+void stopguprof __P((struct gmonparam *p));
#else
+#define startguprof(p)
+#define stopguprof(p)
+#endif /* GUPROF */
+
+#else /* !KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
void mcount __P((void)) __asm("mcount");
static void _mcount __P((fptrint_t frompc, fptrint_t selfpc));
-#endif
+__END_DECLS
+
+#endif /* KERNEL */
#ifdef GUPROF
-u_int cputime __P((void));
+/* XXX doesn't quite work outside kernel yet. */
+extern int cputime_bias;
+
+__BEGIN_DECLS
+int cputime __P((void));
+void empty_loop __P((void));
void mexitcount __P((fptrint_t selfpc));
-#endif
+void nullfunc __P((void));
+void nullfunc_loop __P((void));
__END_DECLS
+#endif
#endif /* !_MACHINE_PROFILE_H_ */
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 67afe44..06d34ff 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -1,7 +1,7 @@
# This file tells config what files go into building a kernel,
# files marked standard are always included.
#
-# $Id: files.i386,v 1.139 1996/08/27 19:45:54 pst Exp $
+# $Id: files.i386,v 1.140 1996/09/11 19:53:30 phk Exp $
#
aic7xxx_asm optional ahc device-driver \
dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \
@@ -51,6 +51,7 @@ i386/i386/machdep.c standard
i386/i386/math_emulate.c optional math_emulate
i386/i386/mem.c standard
i386/i386/microtime.s standard
+i386/i386/perfmon.c optional perfmon profiling-routine
i386/i386/perfmon.c optional perfmon
i386/i386/pmap.c standard
i386/i386/procfs_machdep.c standard
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index 3d3a3f700..7c09359 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -1,4 +1,4 @@
-# $Id: options.i386,v 1.22 1996/10/09 18:36:44 bde Exp $
+# $Id: options.i386,v 1.23 1996/10/09 19:47:07 bde Exp $
BOUNCEPAGES opt_bounce.h
USER_LDT
MATH_EMULATE opt_math_emulate.h
@@ -18,6 +18,8 @@ COMCONSOLE opt_comconsole.h
COM_ESP opt_sio.h
COM_MULTIPORT opt_sio.h
DSI_SOFT_MODEM opt_sio.h
+I586_CTR_GUPROF opt_i586_guprof.h
+I586_PMC_GUPROF opt_i586_guprof.h
FAT_CURSOR opt_pcvt.h
PCVT_FREEBSD opt_pcvt.h
PCVT_SCANSET opt_pcvt.h
diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386
index 67afe44..06d34ff 100644
--- a/sys/i386/conf/files.i386
+++ b/sys/i386/conf/files.i386
@@ -1,7 +1,7 @@
# This file tells config what files go into building a kernel,
# files marked standard are always included.
#
-# $Id: files.i386,v 1.139 1996/08/27 19:45:54 pst Exp $
+# $Id: files.i386,v 1.140 1996/09/11 19:53:30 phk Exp $
#
aic7xxx_asm optional ahc device-driver \
dependency "$S/dev/aic7xxx/aic7xxx_asm.c" \
@@ -51,6 +51,7 @@ i386/i386/machdep.c standard
i386/i386/math_emulate.c optional math_emulate
i386/i386/mem.c standard
i386/i386/microtime.s standard
+i386/i386/perfmon.c optional perfmon profiling-routine
i386/i386/perfmon.c optional perfmon
i386/i386/pmap.c standard
i386/i386/procfs_machdep.c standard
diff --git a/sys/i386/conf/options.i386 b/sys/i386/conf/options.i386
index 3d3a3f700..7c09359 100644
--- a/sys/i386/conf/options.i386
+++ b/sys/i386/conf/options.i386
@@ -1,4 +1,4 @@
-# $Id: options.i386,v 1.22 1996/10/09 18:36:44 bde Exp $
+# $Id: options.i386,v 1.23 1996/10/09 19:47:07 bde Exp $
BOUNCEPAGES opt_bounce.h
USER_LDT
MATH_EMULATE opt_math_emulate.h
@@ -18,6 +18,8 @@ COMCONSOLE opt_comconsole.h
COM_ESP opt_sio.h
COM_MULTIPORT opt_sio.h
DSI_SOFT_MODEM opt_sio.h
+I586_CTR_GUPROF opt_i586_guprof.h
+I586_PMC_GUPROF opt_i586_guprof.h
FAT_CURSOR opt_pcvt.h
PCVT_FREEBSD opt_pcvt.h
PCVT_SCANSET opt_pcvt.h
diff --git a/sys/i386/include/profile.h b/sys/i386/include/profile.h
index 4b9d51b..08d0dbd 100644
--- a/sys/i386/include/profile.h
+++ b/sys/i386/include/profile.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)profile.h 8.1 (Berkeley) 6/11/93
- * $Id: profile.h,v 1.6 1996/01/01 17:11:21 bde Exp $
+ * $Id: profile.h,v 1.7 1996/08/28 20:15:25 bde Exp $
*/
#ifndef _MACHINE_PROFILE_H_
@@ -97,18 +97,44 @@ typedef u_int fptrint_t;
*/
typedef int fptrdiff_t;
-__BEGIN_DECLS
#ifdef KERNEL
+
void mcount __P((fptrint_t frompc, fptrint_t selfpc));
+
+#ifdef GUPROF
+struct gmonparam;
+
+void nullfunc_loop_profiled __P((void));
+void nullfunc_profiled __P((void));
+void startguprof __P((struct gmonparam *p));
+void stopguprof __P((struct gmonparam *p));
#else
+#define startguprof(p)
+#define stopguprof(p)
+#endif /* GUPROF */
+
+#else /* !KERNEL */
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
void mcount __P((void)) __asm("mcount");
static void _mcount __P((fptrint_t frompc, fptrint_t selfpc));
-#endif
+__END_DECLS
+
+#endif /* KERNEL */
#ifdef GUPROF
-u_int cputime __P((void));
+/* XXX doesn't quite work outside kernel yet. */
+extern int cputime_bias;
+
+__BEGIN_DECLS
+int cputime __P((void));
+void empty_loop __P((void));
void mexitcount __P((fptrint_t selfpc));
-#endif
+void nullfunc __P((void));
+void nullfunc_loop __P((void));
__END_DECLS
+#endif
#endif /* !_MACHINE_PROFILE_H_ */
diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c
index f140b84..62c8df5 100644
--- a/sys/i386/isa/prof_machdep.c
+++ b/sys/i386/isa/prof_machdep.c
@@ -1,17 +1,64 @@
-/*
- * NEED A COPYRIGHT NOPTICE HERE
+/*-
+ * Copyright (c) 1996 Bruce D. Evans.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
*
- * $Id$
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: prof_machdep.c,v 1.2 1996/04/08 16:41:06 wollman Exp $
*/
+
+#ifdef GUPROF
+#include "opt_cpu.h"
+#include "opt_i586_guprof.h"
+#include "opt_perfmon.h"
+
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/gmon.h>
+
#include <machine/clock.h>
+#include <machine/perfmon.h>
+#include <machine/profile.h>
+#endif
+
#include <i386/isa/isa.h>
#include <i386/isa/timerreg.h>
#ifdef GUPROF
-extern u_int cputime __P((void));
+#define CPUTIME_CLOCK_UNINITIALIZED 0
+#define CPUTIME_CLOCK_I8254 1
+#define CPUTIME_CLOCK_I586_CTR 2
+#define CPUTIME_CLOCK_I586_PMC 3
+#define CPUTIME_CLOCK_I8254_SHIFT 7
+
+int cputime_bias = 1; /* initialize for locality of reference */
+
+static int cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
+#ifdef I586_PMC_GUPROF
+static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF;
+static int cputime_clock_pmc_init;
+static struct gmonparam saved_gmp;
#endif
+#endif /* GUPROF */
#ifdef __GNUC__
asm("
@@ -52,13 +99,13 @@ Lgot_frompc:
#
movl (%esp),%eax
- pushf
+ pushfl
pushl %eax
pushl %edx
cli
call _mcount
addl $8,%esp
- popf
+ popfl
Lmcount_exit:
ret
");
@@ -94,12 +141,12 @@ mexitcount:
pushl %edx
pushl %eax
movl 8(%esp),%eax
- pushf
+ pushfl
pushl %eax
cli
call _mexitcount
addl $4,%esp
- popf
+ popfl
popl %eax
popl %edx
Lmexitcount_exit:
@@ -113,20 +160,48 @@ Lmexitcount_exit:
* Return the time elapsed since the last call. The units are machine-
* dependent.
*/
-u_int
+int
cputime()
{
u_int count;
- u_int delta;
- u_char low;
+ int delta;
+#ifdef I586_PMC_GUPROF
+ u_quad_t event_count;
+#endif
+ u_char high, low;
static u_int prev_count;
+#if defined(I586_CPU) || defined(I686_CPU)
+ if (cputime_clock == CPUTIME_CLOCK_I586_CTR) {
+ count = (u_int)rdtsc();
+ delta = (int)(count - prev_count);
+ prev_count = count;
+ return (delta);
+ }
+#ifdef I586_PMC_GUPROF
+ if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
+ /*
+ * XXX permon_read() should be inlined so that the
+ * perfmon module doesn't need to be compiled with
+ * profiling disabled and so that it is fast.
+ */
+ perfmon_read(0, &event_count);
+
+ count = (u_int)event_count;
+ delta = (int)(count - prev_count);
+ prev_count = count;
+ return (delta);
+ }
+#endif /* I586_PMC_GUPROF */
+#endif /* I586_CPU or I686_CPU */
+
/*
* Read the current value of the 8254 timer counter 0.
*/
outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
low = inb(TIMER_CNTR0);
- count = low | (inb(TIMER_CNTR0) << 8);
+ high = inb(TIMER_CNTR0);
+ count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;
/*
* The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
@@ -140,10 +215,75 @@ cputime()
delta = prev_count - count;
prev_count = count;
if ((int) delta <= 0)
- return (delta + timer0_max_count);
+ return (delta + (timer0_max_count << CPUTIME_CLOCK_I8254_SHIFT));
return (delta);
}
-#else /* not GUPROF */
+
+/*
+ * The start and stop routines need not be here since we turn off profiling
+ * before calling them. They are here for convenience.
+ */
+
+void
+startguprof(gp)
+ struct gmonparam *gp;
+{
+ if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
+ cputime_clock = CPUTIME_CLOCK_I8254;
+#if defined(I586_CPU) || defined(I686_CPU)
+ if (i586_ctr_freq != 0)
+ cputime_clock = CPUTIME_CLOCK_I586_CTR;
+#endif
+ }
+ gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT;
+#if defined(I586_CPU) || defined(I686_CPU)
+ if (cputime_clock == CPUTIME_CLOCK_I586_CTR)
+ gp->profrate = i586_ctr_freq;
+#ifdef I586_PMC_GUPROF
+ else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
+ if (perfmon_avail() &&
+ perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
+ if (perfmon_start(0) != 0)
+ perfmon_fini(0);
+ else {
+ /* XXX 1 event == 1 us. */
+ gp->profrate = 1000000;
+
+ saved_gmp = *gp;
+
+ /* Zap overheads. They are invalid. */
+ gp->cputime_overhead = 0;
+ gp->mcount_overhead = 0;
+ gp->mcount_post_overhead = 0;
+ gp->mcount_pre_overhead = 0;
+ gp->mexitcount_overhead = 0;
+ gp->mexitcount_post_overhead = 0;
+ gp->mexitcount_pre_overhead = 0;
+
+ cputime_clock_pmc_init = TRUE;
+ }
+ }
+ }
+#endif /* I586_PMC_GUPROF */
+#endif /* I586_CPU or I686_CPU */
+ cputime_bias = 0;
+ cputime();
+}
+
+void
+stopguprof(gp)
+ struct gmonparam *gp;
+{
+#if defined(PERFMON) && defined(I586_PMC_GUPROF)
+ if (cputime_clock_pmc_init) {
+ *gp = saved_gmp;
+ perfmon_fini(0);
+ cputime_clock_pmc_init = FALSE;
+ }
+#endif
+}
+
+#else /* !GUPROF */
#ifdef __GNUC__
asm("
.text
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index 0727f9b..d17a3b0 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)subr_prof.c 8.3 (Berkeley) 9/23/93
- * $Id: subr_prof.c,v 1.15 1995/12/26 01:21:39 bde Exp $
+ * $Id: subr_prof.c,v 1.16 1995/12/29 15:29:08 bde Exp $
*/
#include <sys/param.h>
@@ -56,6 +56,22 @@ struct gmonparam _gmonparam = { GMON_PROF_OFF };
extern char btext[];
extern char etext[];
+#ifdef GUPROF
+void
+nullfunc_loop_profiled()
+{
+ int i;
+
+ for (i = 0; i < CALIB_SCALE; i++)
+ nullfunc_profiled();
+}
+
+void
+nullfunc_profiled()
+{
+}
+#endif /* GUPROF */
+
static void
kmstartup(dummy)
void *dummy;
@@ -63,8 +79,14 @@ kmstartup(dummy)
char *cp;
struct gmonparam *p = &_gmonparam;
#ifdef GUPROF
- fptrint_t kmstartup_addr;
+ int cputime_overhead;
+ int empty_loop_time;
int i;
+ fptrint_t kmstartup_addr;
+ int mcount_overhead;
+ int mexitcount_overhead;
+ int nullfunc_loop_overhead;
+ int nullfunc_loop_profiled_time;
#endif
/*
@@ -74,7 +96,7 @@ kmstartup(dummy)
p->lowpc = ROUNDDOWN((u_long)btext, HISTFRACTION * sizeof(HISTCOUNTER));
p->highpc = ROUNDUP((u_long)etext, HISTFRACTION * sizeof(HISTCOUNTER));
p->textsize = p->highpc - p->lowpc;
- printf("Profiling kernel, textsize=%d [%x..%x]\n",
+ printf("Profiling kernel, textsize=%lu [%x..%x]\n",
p->textsize, p->lowpc, p->highpc);
p->kcountsize = p->textsize / HISTFRACTION;
p->hashfraction = HASHFRACTION;
@@ -99,41 +121,56 @@ kmstartup(dummy)
p->froms = (u_short *)cp;
#ifdef GUPROF
- /*
- * Initialize pointers to overhead counters.
- */
+ /* Initialize pointers to overhead counters. */
p->cputime_count = &KCOUNT(p, PC_TO_I(p, cputime));
p->mcount_count = &KCOUNT(p, PC_TO_I(p, mcount));
p->mexitcount_count = &KCOUNT(p, PC_TO_I(p, mexitcount));
/*
- * Determine overheads.
+ * Disable interrupts to avoid interference while we calibrate
+ * things.
*/
disable_intr();
- p->state = GMON_PROF_HIRES;
- p->cputime_overhead = 0;
- (void)cputime();
+ /*
+ * Determine overheads.
+ * XXX this needs to be repeated for each useful timer/counter.
+ */
+ cputime_overhead = 0;
+ startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
- p->cputime_overhead += cputime();
+ cputime_overhead += cputime();
+
+ empty_loop();
+ startguprof(p);
+ empty_loop();
+ empty_loop_time = cputime();
+
+ nullfunc_loop_profiled();
+
+ /*
+ * Start profiling. There won't be any normal function calls since
+ * interrupts are disabled, but we will call the profiling routines
+ * directly to determine their overheads.
+ */
+ p->state = GMON_PROF_HIRES;
+
+ startguprof(p);
+ nullfunc_loop_profiled();
- (void)cputime();
+ startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
#if defined(i386) && __GNUC__ >= 2
- /*
- * Underestimate slightly by always calling __mcount, never
- * mcount.
- */
asm("pushl %0; call __mcount; popl %%ecx"
:
- : "i" (kmstartup)
+ : "i" (profil)
: "ax", "bx", "cx", "dx", "memory");
#else
#error
#endif
- p->mcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup));
+ mcount_overhead = KCOUNT(p, PC_TO_I(p, profil));
- (void)cputime();
+ startguprof(p);
for (i = 0; i < CALIB_SCALE; i++)
#if defined(i386) && __GNUC__ >= 2
asm("call mexitcount; 1:"
@@ -142,25 +179,96 @@ kmstartup(dummy)
#else
#error
#endif
- p->mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr));
+ mexitcount_overhead = KCOUNT(p, PC_TO_I(p, kmstartup_addr));
p->state = GMON_PROF_OFF;
+ stopguprof(p);
+
enable_intr();
- p->mcount_overhead_sub = p->mcount_overhead - p->cputime_overhead;
- p->mexitcount_overhead_sub = p->mexitcount_overhead
- - p->cputime_overhead;
- printf("Profiling overheads: %u+%u %u+%u\n",
- p->cputime_overhead, p->mcount_overhead_sub,
- p->cputime_overhead, p->mexitcount_overhead_sub);
- p->cputime_overhead_frac = p->cputime_overhead % CALIB_SCALE;
- p->cputime_overhead /= CALIB_SCALE;
- p->mcount_overhead_frac = p->mcount_overhead_sub % CALIB_SCALE;
- p->mcount_overhead_sub /= CALIB_SCALE;
- p->mcount_overhead /= CALIB_SCALE;
- p->mexitcount_overhead_frac = p->mexitcount_overhead_sub % CALIB_SCALE;
- p->mexitcount_overhead_sub /= CALIB_SCALE;
- p->mexitcount_overhead /= CALIB_SCALE;
+ nullfunc_loop_profiled_time = 0;
+ for (i = 0; i < 28; i += sizeof(HISTCOUNTER)) {
+ int x;
+
+ x = KCOUNT(p, PC_TO_I(p,
+ (fptrint_t)nullfunc_loop_profiled + i));
+ nullfunc_loop_profiled_time += x;
+ printf("leaf[%d] = %d sum %d\n",
+ i, x, nullfunc_loop_profiled_time);
+ }
+#define CALIB_DOSCALE(count) (((count) + CALIB_SCALE / 3) / CALIB_SCALE)
+#define c2n(count, freq) ((int)((count) * 1000000000LL / freq))
+ printf("cputime %d, empty_loop %d, nullfunc_loop_profiled %d, mcount %d, mexitcount %d\n",
+ CALIB_DOSCALE(c2n(cputime_overhead, p->profrate)),
+ CALIB_DOSCALE(c2n(empty_loop_time, p->profrate)),
+ CALIB_DOSCALE(c2n(nullfunc_loop_profiled_time, p->profrate)),
+ CALIB_DOSCALE(c2n(mcount_overhead, p->profrate)),
+ CALIB_DOSCALE(c2n(mexitcount_overhead, p->profrate)));
+ cputime_overhead -= empty_loop_time;
+ mcount_overhead -= empty_loop_time;
+ mexitcount_overhead -= empty_loop_time;
+
+ /*-
+ * Profiling overheads are determined by the times between the
+ * following events:
+ * MC1: mcount() is called
+ * MC2: cputime() (called from mcount()) latches the timer
+ * MC3: mcount() completes
+ * ME1: mexitcount() is called
+ * ME2: cputime() (called from mexitcount()) latches the timer
+ * ME3: mexitcount() completes.
+ * The times between the events vary slightly depending on instruction
+ * combination and cache misses, etc. Attempt to determine the
+ * minimum times. These can be subtracted from the profiling times
+ * without much risk of reducing the profiling times below what they
+ * would be when profiling is not configured. Abbreviate:
+ * ab = minimum time between MC1 and MC3
+ * a = minumum time between MC1 and MC2
+ * b = minimum time between MC2 and MC3
+ * cd = minimum time between ME1 and ME3
+ * c = minimum time between ME1 and ME2
+ * d = minimum time between ME2 and ME3.
+ * These satisfy the relations:
+ * ab <= mcount_overhead (just measured)
+ * a + b <= ab
+ * cd <= mexitcount_overhead (just measured)
+ * c + d <= cd
+ * a + d <= nullfunc_loop_profiled_time (just measured)
+ * a >= 0, b >= 0, c >= 0, d >= 0.
+ * Assume that ab and cd are equal to the minimums.
+ */
+ p->cputime_overhead = CALIB_DOSCALE(cputime_overhead);
+ p->mcount_overhead = CALIB_DOSCALE(mcount_overhead - cputime_overhead);
+ p->mexitcount_overhead = CALIB_DOSCALE(mexitcount_overhead
+ - cputime_overhead);
+ nullfunc_loop_overhead = nullfunc_loop_profiled_time - empty_loop_time;
+ p->mexitcount_post_overhead = CALIB_DOSCALE((mcount_overhead
+ - nullfunc_loop_overhead)
+ / 4);
+ p->mexitcount_pre_overhead = p->mexitcount_overhead
+ + p->cputime_overhead
+ - p->mexitcount_post_overhead;
+ p->mcount_pre_overhead = CALIB_DOSCALE(nullfunc_loop_overhead)
+ - p->mexitcount_post_overhead;
+ p->mcount_post_overhead = p->mcount_overhead
+ + p->cputime_overhead
+ - p->mcount_pre_overhead;
+ printf(
+"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d nsec\n",
+ c2n(p->cputime_overhead, p->profrate),
+ c2n(p->mcount_overhead, p->profrate),
+ c2n(p->mcount_pre_overhead, p->profrate),
+ c2n(p->mcount_post_overhead, p->profrate),
+ c2n(p->cputime_overhead, p->profrate),
+ c2n(p->mexitcount_overhead, p->profrate),
+ c2n(p->mexitcount_pre_overhead, p->profrate),
+ c2n(p->mexitcount_post_overhead, p->profrate));
+ printf(
+"Profiling overheads: mcount: %d+%d, %d+%d; mexitcount: %d+%d, %d+%d cycles\n",
+ p->cputime_overhead, p->mcount_overhead,
+ p->mcount_pre_overhead, p->mcount_post_overhead,
+ p->cputime_overhead, p->mexitcount_overhead,
+ p->mexitcount_pre_overhead, p->mexitcount_post_overhead);
#endif /* GUPROF */
}
@@ -189,16 +297,20 @@ sysctl_kern_prof SYSCTL_HANDLER_ARGS
if (!req->newptr)
return (0);
if (state == GMON_PROF_OFF) {
- stopprofclock(&proc0);
gp->state = state;
+ stopprofclock(&proc0);
+ stopguprof(gp);
} else if (state == GMON_PROF_ON) {
+ gp->state = GMON_PROF_OFF;
+ stopguprof(gp);
gp->profrate = profhz;
- gp->state = state;
startprofclock(&proc0);
+ gp->state = state;
#ifdef GUPROF
} else if (state == GMON_PROF_HIRES) {
- gp->profrate = 1193182; /* XXX */
+ gp->state = GMON_PROF_OFF;
stopprofclock(&proc0);
+ startguprof(gp);
gp->state = state;
#endif
} else if (state != gp->state)
diff --git a/sys/libkern/mcount.c b/sys/libkern/mcount.c
index 30cda2d..e7105d0 100644
--- a/sys/libkern/mcount.c
+++ b/sys/libkern/mcount.c
@@ -36,13 +36,12 @@
static char sccsid[] = "@(#)mcount.c 8.1 (Berkeley) 6/4/93";
#endif
static const char rcsid[] =
- "$Id: mcount.c,v 1.7 1996/05/02 14:20:33 phk Exp $";
+ "$Id: mcount.c,v 1.8 1996/08/28 20:15:12 bde Exp $";
#endif
#include <sys/param.h>
#include <sys/gmon.h>
#ifdef KERNEL
-#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
@@ -71,7 +70,7 @@ _MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */
register fptrint_t frompc, selfpc;
{
#ifdef GUPROF
- u_int delta;
+ int delta;
#endif
register fptrdiff_t frompci;
register u_short *frompcindex;
@@ -115,50 +114,33 @@ _MCOUNT_DECL(frompc, selfpc) /* _mcount; may be static, inline, etc */
#endif /* KERNEL */
#ifdef GUPROF
- if (p->state != GMON_PROF_HIRES)
- goto skip_guprof_stuff;
- /*
- * Look at the clock and add the count of clock cycles since the
- * clock was last looked at to a counter for frompc. This
- * solidifies the count for the function containing frompc and
- * effectively starts another clock for the current function.
- * The count for the new clock will be solidified when another
- * function call is made or the function returns.
- *
- * We use the usual sampling counters since they can be located
- * efficiently. 4-byte counters are usually necessary.
- *
- * There are many complications for subtracting the profiling
- * overheads from the counts for normal functions and adding
- * them to the counts for mcount(), mexitcount() and cputime().
- * We attempt to handle fractional cycles, but the overheads
- * are usually underestimated because they are calibrated for
- * a simpler than usual setup.
- */
- delta = cputime() - p->mcount_overhead;
- p->cputime_overhead_resid += p->cputime_overhead_frac;
- p->mcount_overhead_resid += p->mcount_overhead_frac;
- if ((int)delta < 0)
- *p->mcount_count += delta + p->mcount_overhead
- - p->cputime_overhead;
- else if (delta != 0) {
- if (p->cputime_overhead_resid >= CALIB_SCALE) {
- p->cputime_overhead_resid -= CALIB_SCALE;
- ++*p->cputime_count;
- --delta;
- }
- if (delta != 0) {
- if (p->mcount_overhead_resid >= CALIB_SCALE) {
- p->mcount_overhead_resid -= CALIB_SCALE;
- ++*p->mcount_count;
- --delta;
- }
- KCOUNT(p, frompci) += delta;
- }
- *p->mcount_count += p->mcount_overhead_sub;
+ if (p->state == GMON_PROF_HIRES) {
+ /*
+ * Count the time since cputime() was previously called
+ * against `frompc'. Compensate for overheads.
+ *
+ * cputime() sets its prev_count variable to the count when
+ * it is called. This in effect starts a counter for
+ * the next period of execution (normally from now until
+ * the next call to mcount() or mexitcount()). We set
+ * cputime_bias to compensate for our own overhead.
+ *
+ * We use the usual sampling counters since they can be
+ * located efficiently. 4-byte counters are usually
+ * necessary. gprof will add up the scattered counts
+ * just like it does for statistical profiling. All
+ * counts are signed so that underflow in the subtractions
+ * doesn't matter much (negative counts are normally
+ * compensated for by larger counts elsewhere). Underflow
+ * shouldn't occur, but may be caused by slightly wrong
+ * calibrations or from not clearing cputime_bias.
+ */
+ delta = cputime() - cputime_bias - p->mcount_pre_overhead;
+ cputime_bias = p->mcount_post_overhead;
+ KCOUNT(p, frompci) += delta;
+ *p->cputime_count += p->cputime_overhead;
+ *p->mcount_count += p->mcount_overhead;
}
- *p->cputime_count += p->cputime_overhead;
-skip_guprof_stuff:
#endif /* GUPROF */
#ifdef KERNEL
@@ -290,36 +272,40 @@ mexitcount(selfpc)
p = &_gmonparam;
selfpcdiff = selfpc - (fptrint_t)p->lowpc;
if (selfpcdiff < p->textsize) {
- u_int delta;
+ int delta;
/*
- * Solidify the count for the current function.
+ * Count the time since cputime() was previously called
+ * against `selfpc'. Compensate for overheads.
*/
- delta = cputime() - p->mexitcount_overhead;
- p->cputime_overhead_resid += p->cputime_overhead_frac;
- p->mexitcount_overhead_resid += p->mexitcount_overhead_frac;
- if ((int)delta < 0)
- *p->mexitcount_count += delta + p->mexitcount_overhead
- - p->cputime_overhead;
- else if (delta != 0) {
- if (p->cputime_overhead_resid >= CALIB_SCALE) {
- p->cputime_overhead_resid -= CALIB_SCALE;
- ++*p->cputime_count;
- --delta;
- }
- if (delta != 0) {
- if (p->mexitcount_overhead_resid
- >= CALIB_SCALE) {
- p->mexitcount_overhead_resid
- -= CALIB_SCALE;
- ++*p->mexitcount_count;
- --delta;
- }
- KCOUNT(p, selfpcdiff) += delta;
- }
- *p->mexitcount_count += p->mexitcount_overhead_sub;
- }
+ delta = cputime() - cputime_bias - p->mexitcount_pre_overhead;
+ cputime_bias = p->mexitcount_post_overhead;
+ KCOUNT(p, selfpcdiff) += delta;
*p->cputime_count += p->cputime_overhead;
+ *p->mexitcount_count += p->mexitcount_overhead;
}
}
+
+void
+empty_loop()
+{
+ int i;
+
+ for (i = 0; i < CALIB_SCALE; i++)
+ ;
+}
+
+void
+nullfunc()
+{
+}
+
+void
+nullfunc_loop()
+{
+ int i;
+
+ for (i = 0; i < CALIB_SCALE; i++)
+ nullfunc();
+}
#endif /* GUPROF */
diff --git a/sys/sys/gmon.h b/sys/sys/gmon.h
index 619e94c..6d5334c 100644
--- a/sys/sys/gmon.h
+++ b/sys/sys/gmon.h
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)gmon.h 8.2 (Berkeley) 1/4/94
- * $Id: gmon.h,v 1.7 1995/08/29 03:09:14 bde Exp $
+ * $Id: gmon.h,v 1.8 1995/12/29 15:29:26 bde Exp $
*/
#ifndef _SYS_GMON_H_
@@ -57,7 +57,7 @@ struct gmonhdr {
* Type of histogram counters used in the kernel.
*/
#ifdef GPROF4
-#define HISTCOUNTER unsigned
+#define HISTCOUNTER int
#else
#define HISTCOUNTER unsigned short
#endif
@@ -174,22 +174,17 @@ struct gmonparam {
fptrint_t highpc;
u_long textsize;
u_long hashfraction;
- u_long profrate;
+ int profrate; /* XXX wrong type to match gmonhdr */
HISTCOUNTER *cputime_count;
- u_int cputime_overhead;
- u_int cputime_overhead_frac;
- u_int cputime_overhead_resid;
- u_int cputime_overhead_sub;
+ int cputime_overhead;
HISTCOUNTER *mcount_count;
- u_int mcount_overhead;
- u_int mcount_overhead_frac;
- u_int mcount_overhead_resid;
- u_int mcount_overhead_sub;
+ int mcount_overhead;
+ int mcount_post_overhead;
+ int mcount_pre_overhead;
HISTCOUNTER *mexitcount_count;
- u_int mexitcount_overhead;
- u_int mexitcount_overhead_frac;
- u_int mexitcount_overhead_resid;
- u_int mexitcount_overhead_sub;
+ int mexitcount_overhead;
+ int mexitcount_post_overhead;
+ int mexitcount_pre_overhead;
};
extern struct gmonparam _gmonparam;
OpenPOWER on IntegriCloud