summaryrefslogtreecommitdiffstats
path: root/sys/amd64/amd64/prof_machdep.c
blob: 273c8336fca33b21d03a1b54b5216fcd828262b6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
/*-
 * Copyright (c) 1996 Bruce D. Evans.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#ifdef GUPROF
#if 0
#include "opt_i586_guprof.h"
#include "opt_perfmon.h"
#endif

#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/eventhandler.h>
#include <sys/gmon.h>
#include <sys/kernel.h>
#include <sys/smp.h>
#include <sys/sysctl.h>

#include <machine/clock.h>
#if 0
#include <machine/perfmon.h>
#endif
#include <machine/timerreg.h>

#define	CPUTIME_CLOCK_UNINITIALIZED	0
#define	CPUTIME_CLOCK_I8254		1
#define	CPUTIME_CLOCK_TSC		2
#define	CPUTIME_CLOCK_I586_PMC		3
#define	CPUTIME_CLOCK_I8254_SHIFT	7

int	cputime_bias = 1;	/* initialize for locality of reference */

static int	cputime_clock = CPUTIME_CLOCK_UNINITIALIZED;
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
static u_int	cputime_clock_pmc_conf = I586_PMC_GUPROF;
static int	cputime_clock_pmc_init;
static struct gmonparam saved_gmp;
#endif
static int	cputime_prof_active;
#endif /* GUPROF */

#ifdef __GNUCLIKE_ASM
__asm("								\n\
GM_STATE	=	0					\n\
GMON_PROF_OFF	=	3					\n\
								\n\
	.text							\n\
	.p2align 4,0x90						\n\
	.globl	__mcount					\n\
	.type	__mcount,@function				\n\
__mcount:							\n\
	#							\n\
	# Check that we are profiling.  Do it early for speed.	\n\
	#							\n\
	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
	je	.mcount_exit					\n\
	#							\n\
	# __mcount is the same as [.]mcount except the caller	\n\
	# hasn't changed the stack except to call here, so the	\n\
	# caller's raddr is above our raddr.			\n\
	#							\n\
	pushq	%rax						\n\
	pushq	%rdx						\n\
	pushq	%rcx						\n\
	pushq	%rsi						\n\
	pushq	%rdi						\n\
	pushq	%r8						\n\
	pushq	%r9						\n\
	movq	7*8+8(%rsp),%rdi				\n\
	jmp	.got_frompc					\n\
								\n\
	.p2align 4,0x90						\n\
	.globl	.mcount						\n\
.mcount:							\n\
	cmpl	$GMON_PROF_OFF,_gmonparam+GM_STATE		\n\
	je	.mcount_exit					\n\
	#							\n\
	# The caller's stack frame has already been built, so	\n\
	# %rbp is the caller's frame pointer.  The caller's	\n\
	# raddr is in the caller's frame following the caller's	\n\
	# caller's frame pointer.				\n\
	#							\n\
	pushq	%rax						\n\
	pushq	%rdx						\n\
	pushq	%rcx						\n\
	pushq	%rsi						\n\
	pushq	%rdi						\n\
	pushq	%r8						\n\
	pushq	%r9						\n\
	movq	8(%rbp),%rdi					\n\
.got_frompc:							\n\
	#							\n\
	# Our raddr is the caller's pc.				\n\
	#							\n\
	movq	7*8(%rsp),%rsi					\n\
								\n\
	pushfq							\n\
	cli							\n\
	call	mcount						\n\
	popfq							\n\
	popq	%r9						\n\
	popq	%r8						\n\
	popq	%rdi						\n\
	popq	%rsi						\n\
	popq	%rcx						\n\
	popq	%rdx						\n\
	popq	%rax						\n\
.mcount_exit:							\n\
	ret	$0						\n\
");
#else /* !__GNUCLIKE_ASM */
#error "this file needs to be ported to your compiler"
#endif /* __GNUCLIKE_ASM */

#ifdef GUPROF
/*
 * [.]mexitcount saves the return register(s), loads selfpc and calls
 * mexitcount(selfpc) to do the work.  Someday it should be in a machine
 * dependent file together with cputime(), __mcount and [.]mcount.  cputime()
 * can't just be put in machdep.c because it has to be compiled without -pg.
 */
#ifdef __GNUCLIKE_ASM
__asm("								\n\
	.text							\n\
#								\n\
# Dummy label to be seen when gprof -u hides [.]mexitcount.	\n\
#								\n\
	.p2align 4,0x90						\n\
	.globl	__mexitcount					\n\
	.type	__mexitcount,@function				\n\
__mexitcount:							\n\
	nop							\n\
								\n\
GMON_PROF_HIRES	=	4					\n\
								\n\
	.p2align 4,0x90						\n\
	.globl	.mexitcount					\n\
.mexitcount:							\n\
	cmpl	$GMON_PROF_HIRES,_gmonparam+GM_STATE		\n\
	jne	.mexitcount_exit				\n\
	pushq	%rax						\n\
	pushq	%rdx						\n\
	pushq	%rcx						\n\
	pushq	%rsi						\n\
	pushq	%rdi						\n\
	pushq	%r8						\n\
	pushq	%r9						\n\
	movq	7*8(%rsp),%rdi					\n\
	pushfq							\n\
	cli							\n\
	call	mexitcount					\n\
	popfq							\n\
	popq	%r9						\n\
	popq	%r8						\n\
	popq	%rdi						\n\
	popq	%rsi						\n\
	popq	%rcx						\n\
	popq	%rdx						\n\
	popq	%rax						\n\
.mexitcount_exit:						\n\
	ret	$0						\n\
");
#endif /* __GNUCLIKE_ASM */

/*
 * Return the time elapsed since the last call.  The units are machine-
 * dependent.
 */
int
cputime()
{
	u_int count;
	int delta;
#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP)
	u_quad_t event_count;
#endif
	u_char high, low;
	static u_int prev_count;

	if (cputime_clock == CPUTIME_CLOCK_TSC) {
		/*
		 * Scale the TSC a little to make cputime()'s frequency
		 * fit in an int, assuming that the TSC frequency fits
		 * in a u_int.  Use a fixed scale since dynamic scaling
		 * would be slower and we can't really use the low bit
		 * of precision.
		 */
		count = (u_int)rdtsc() & ~1u;
		delta = (int)(count - prev_count) >> 1;
		prev_count = count;
		return (delta);
	}
#if defined(PERFMON) && defined(I586_PMC_GUPROF) && !defined(SMP)
	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
		/*
		 * XXX permon_read() should be inlined so that the
		 * perfmon module doesn't need to be compiled with
		 * profiling disabled and so that it is fast.
		 */
		perfmon_read(0, &event_count);

		count = (u_int)event_count;
		delta = (int)(count - prev_count);
		prev_count = count;
		return (delta);
	}
#endif /* PERFMON && I586_PMC_GUPROF && !SMP */

	/*
	 * Read the current value of the 8254 timer counter 0.
	 */
	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
	low = inb(TIMER_CNTR0);
	high = inb(TIMER_CNTR0);
	count = ((high << 8) | low) << CPUTIME_CLOCK_I8254_SHIFT;

	/*
	 * The timer counts down from TIMER_CNTR0_MAX to 0 and then resets.
	 * While profiling is enabled, this routine is called at least twice
	 * per timer reset (for mcounting and mexitcounting hardclock()),
	 * so at most one reset has occurred since the last call, and one
	 * has occurred iff the current count is larger than the previous
	 * count.  This allows counter underflow to be detected faster
	 * than in microtime().
	 */
	delta = prev_count - count;
	prev_count = count;
	if ((int) delta <= 0)
		return (delta + (i8254_max_count << CPUTIME_CLOCK_I8254_SHIFT));
	return (delta);
}

static int
sysctl_machdep_cputime_clock(SYSCTL_HANDLER_ARGS)
{
	int clock;
	int error;
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
	int event;
	struct pmc pmc;
#endif

	clock = cputime_clock;
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
	if (clock == CPUTIME_CLOCK_I586_PMC) {
		pmc.pmc_val = cputime_clock_pmc_conf;
		clock += pmc.pmc_event;
	}
#endif
	error = sysctl_handle_opaque(oidp, &clock, sizeof clock, req);
	if (error == 0 && req->newptr != NULL) {
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
		if (clock >= CPUTIME_CLOCK_I586_PMC) {
			event = clock - CPUTIME_CLOCK_I586_PMC;
			if (event >= 256)
				return (EINVAL);
			pmc.pmc_num = 0;
			pmc.pmc_event = event;
			pmc.pmc_unit = 0;
			pmc.pmc_flags = PMCF_E | PMCF_OS | PMCF_USR;
			pmc.pmc_mask = 0;
			cputime_clock_pmc_conf = pmc.pmc_val;
			cputime_clock = CPUTIME_CLOCK_I586_PMC;
		} else
#endif
		{
			if (clock < 0 || clock >= CPUTIME_CLOCK_I586_PMC)
				return (EINVAL);
			cputime_clock = clock;
		}
	}
	return (error);
}

SYSCTL_PROC(_machdep, OID_AUTO, cputime_clock, CTLTYPE_INT | CTLFLAG_RW,
	    0, sizeof(u_int), sysctl_machdep_cputime_clock, "I", "");

/*
 * The start and stop routines need not be here since we turn off profiling
 * before calling them.  They are here for convenience.
 */

void
startguprof(gp)
	struct gmonparam *gp;
{
	uint64_t freq;

	freq = atomic_load_acq_64(&tsc_freq);
	if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) {
		if (freq != 0 && mp_ncpus == 1)
			cputime_clock = CPUTIME_CLOCK_TSC;
		else
			cputime_clock = CPUTIME_CLOCK_I8254;
	}
	if (cputime_clock == CPUTIME_CLOCK_TSC) {
		gp->profrate = freq >> 1;
		cputime_prof_active = 1;
	} else
		gp->profrate = i8254_freq << CPUTIME_CLOCK_I8254_SHIFT;
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
	if (cputime_clock == CPUTIME_CLOCK_I586_PMC) {
		if (perfmon_avail() &&
		    perfmon_setup(0, cputime_clock_pmc_conf) == 0) {
			if (perfmon_start(0) != 0)
				perfmon_fini(0);
			else {
				/* XXX 1 event == 1 us. */
				gp->profrate = 1000000;

				saved_gmp = *gp;

				/* Zap overheads.  They are invalid. */
				gp->cputime_overhead = 0;
				gp->mcount_overhead = 0;
				gp->mcount_post_overhead = 0;
				gp->mcount_pre_overhead = 0;
				gp->mexitcount_overhead = 0;
				gp->mexitcount_post_overhead = 0;
				gp->mexitcount_pre_overhead = 0;

				cputime_clock_pmc_init = TRUE;
			}
		}
	}
#endif /* PERFMON && I586_PMC_GUPROF */
	cputime_bias = 0;
	cputime();
}

void
stopguprof(gp)
	struct gmonparam *gp;
{
#if defined(PERFMON) && defined(I586_PMC_GUPROF)
	if (cputime_clock_pmc_init) {
		*gp = saved_gmp;
		perfmon_fini(0);
		cputime_clock_pmc_init = FALSE;
	}
#endif
	if (cputime_clock == CPUTIME_CLOCK_TSC)
		cputime_prof_active = 0;
}

/* If the cpu frequency changed while profiling, report a warning. */
static void
tsc_freq_changed(void *arg, const struct cf_level *level, int status)
{

	/*
	 * If there was an error during the transition or
	 * TSC is P-state invariant, don't do anything.
	 */
	if (status != 0 || tsc_is_invariant)
		return;
	if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC)
		printf("warning: cpu freq changed while profiling active\n");
}

EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL,
    EVENTHANDLER_PRI_ANY);

#endif /* GUPROF */
OpenPOWER on IntegriCloud