summaryrefslogtreecommitdiffstats
path: root/sys/x86/cpufreq/hwpstate.c
blob: 272792658e62b8f159c56222a80257e036a1d9e1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
/*-
 * Copyright (c) 2005 Nate Lawson
 * Copyright (c) 2004 Colin Percival
 * Copyright (c) 2004-2005 Bruno Durcot
 * Copyright (c) 2004 FUKUDA Nobuhiko
 * Copyright (c) 2009 Michael Reifenberger
 * Copyright (c) 2009 Norikatsu Shigemura
 * Copyright (c) 2008-2009 Gen Otsuji
 *
 * This code is depending on kern_cpu.c, est.c, powernow.c, p4tcc.c, smist.c
 * in various parts. The authors of these files are Nate Lawson,
 * Colin Percival, Bruno Durcot, and FUKUDA Nobuhiko.
 * This code contains patches by Michael Reifenberger and Norikatsu Shigemura.
 * Thank you.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted providing that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * For more info:
 * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 10h Processors
 * 31116 Rev 3.20  February 04, 2009
 * BIOS and Kernel Developer's Guide(BKDG) for AMD Family 11h Processors
 * 41256 Rev 3.00 - July 07, 2008
 */

#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");

#include <sys/param.h>
#include <sys/bus.h>
#include <sys/cpu.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/pcpu.h>
#include <sys/smp.h>
#include <sys/sched.h>

#include <machine/md_var.h>
#include <machine/cputypes.h>
#include <machine/specialreg.h>

#include <contrib/dev/acpica/include/acpi.h>

#include <dev/acpica/acpivar.h>

#include "acpi_if.h"
#include "cpufreq_if.h"

#define	MSR_AMD_10H_11H_LIMIT	0xc0010061
#define	MSR_AMD_10H_11H_CONTROL	0xc0010062
#define	MSR_AMD_10H_11H_STATUS	0xc0010063
#define	MSR_AMD_10H_11H_CONFIG	0xc0010064

#define	AMD_10H_11H_MAX_STATES	16

/* for MSR_AMD_10H_11H_LIMIT C001_0061 */
#define	AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)	(((msr) >> 4) & 0x7)
#define	AMD_10H_11H_GET_PSTATE_LIMIT(msr)	(((msr)) & 0x7)
/* for MSR_AMD_10H_11H_CONFIG 10h:C001_0064:68 / 11h:C001_0064:6B */
#define	AMD_10H_11H_CUR_VID(msr)		(((msr) >> 9) & 0x7F)
#define	AMD_10H_11H_CUR_DID(msr)		(((msr) >> 6) & 0x07)
#define	AMD_10H_11H_CUR_FID(msr)		((msr) & 0x3F)

#define	HWPSTATE_DEBUG(dev, msg...)			\
	do{						\
		if(hwpstate_verbose)			\
			device_printf(dev, msg);	\
	}while(0)

struct hwpstate_setting {
	int	freq;		/* CPU clock in Mhz or 100ths of a percent. */
	int	volts;		/* Voltage in mV. */
	int	power;		/* Power consumed in mW. */
	int	lat;		/* Transition latency in us. */
	int	pstate_id;	/* P-State id */
};

struct hwpstate_softc {
	device_t		dev;
	struct hwpstate_setting	hwpstate_settings[AMD_10H_11H_MAX_STATES];
	int			cfnum;
};

static void	hwpstate_identify(driver_t *driver, device_t parent);
static int	hwpstate_probe(device_t dev);
static int	hwpstate_attach(device_t dev);
static int	hwpstate_detach(device_t dev);
static int	hwpstate_set(device_t dev, const struct cf_setting *cf);
static int	hwpstate_get(device_t dev, struct cf_setting *cf);
static int	hwpstate_settings(device_t dev, struct cf_setting *sets, int *count);
static int	hwpstate_type(device_t dev, int *type);
static int	hwpstate_shutdown(device_t dev);
static int	hwpstate_features(driver_t *driver, u_int *features);
static int	hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev);
static int	hwpstate_get_info_from_msr(device_t dev);
static int	hwpstate_goto_pstate(device_t dev, int pstate_id);

static int	hwpstate_verbose = 0;
SYSCTL_INT(_debug, OID_AUTO, hwpstate_verbose, CTLFLAG_RW | CTLFLAG_TUN,
       &hwpstate_verbose, 0, "Debug hwpstate");
TUNABLE_INT("debug.hwpstate_verbose", &hwpstate_verbose);

static device_method_t hwpstate_methods[] = {
	/* Device interface */
	DEVMETHOD(device_identify,	hwpstate_identify),
	DEVMETHOD(device_probe,		hwpstate_probe),
	DEVMETHOD(device_attach,	hwpstate_attach),
	DEVMETHOD(device_detach,	hwpstate_detach),
	DEVMETHOD(device_shutdown,	hwpstate_shutdown),

	/* cpufreq interface */
	DEVMETHOD(cpufreq_drv_set,	hwpstate_set),
	DEVMETHOD(cpufreq_drv_get,	hwpstate_get),
	DEVMETHOD(cpufreq_drv_settings,	hwpstate_settings),
	DEVMETHOD(cpufreq_drv_type,	hwpstate_type),

	/* ACPI interface */
	DEVMETHOD(acpi_get_features,	hwpstate_features),

	{0, 0}
};

static devclass_t hwpstate_devclass;
static driver_t hwpstate_driver = {
	"hwpstate",
	hwpstate_methods,
	sizeof(struct hwpstate_softc),
};

DRIVER_MODULE(hwpstate, cpu, hwpstate_driver, hwpstate_devclass, 0, 0);

/*
 * Go to Px-state on all cpus considering the limit.
 */
static int
hwpstate_goto_pstate(device_t dev, int pstate)
{
	int i;
	uint64_t msr;
	int j;
	int limit;
	int id = pstate;
	int error;
	
	/* get the current pstate limit */
	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
	limit = AMD_10H_11H_GET_PSTATE_LIMIT(msr);
	if(limit > id)
		id = limit;

	/*
	 * We are going to the same Px-state on all cpus.
	 * Probably should take _PSD into account.
	 */
	error = 0;
	CPU_FOREACH(i) {
		/* Bind to each cpu. */
		thread_lock(curthread);
		sched_bind(curthread, i);
		thread_unlock(curthread);
		HWPSTATE_DEBUG(dev, "setting P%d-state on cpu%d\n",
			id, PCPU_GET(cpuid));
		/* Go To Px-state */
		wrmsr(MSR_AMD_10H_11H_CONTROL, id);
		/* wait loop (100*100 usec is enough ?) */
		for(j = 0; j < 100; j++){
			msr = rdmsr(MSR_AMD_10H_11H_STATUS);
			if(msr == id){
				break;
			}
			DELAY(100);
		}
		/* get the result. not assure msr=id */
		msr = rdmsr(MSR_AMD_10H_11H_STATUS);
		HWPSTATE_DEBUG(dev, "result  P%d-state on cpu%d\n",
		    (int)msr, PCPU_GET(cpuid));
		if (msr != id) {
			HWPSTATE_DEBUG(dev, "error: loop is not enough.\n");
			error = ENXIO;
		}
	}
	thread_lock(curthread);
	sched_unbind(curthread);
	thread_unlock(curthread);
	return (error);
}

static int
hwpstate_set(device_t dev, const struct cf_setting *cf)
{
	struct hwpstate_softc *sc;
	struct hwpstate_setting *set;
	int i;

	if (cf == NULL)
		return (EINVAL);
	sc = device_get_softc(dev);
	set = sc->hwpstate_settings;
	for (i = 0; i < sc->cfnum; i++)
		if (CPUFREQ_CMP(cf->freq, set[i].freq))
			break;
	if (i == sc->cfnum)
		return (EINVAL);

	return (hwpstate_goto_pstate(dev, set[i].pstate_id));
}

static int
hwpstate_get(device_t dev, struct cf_setting *cf)
{
	struct hwpstate_softc *sc;
	struct hwpstate_setting set;
	uint64_t msr;

	sc = device_get_softc(dev);
	if (cf == NULL)
		return (EINVAL);
	msr = rdmsr(MSR_AMD_10H_11H_STATUS);
	if(msr >= sc->cfnum)
		return (EINVAL);
	set = sc->hwpstate_settings[msr];

	cf->freq = set.freq;
	cf->volts = set.volts;
	cf->power = set.power;
	cf->lat = set.lat;
	cf->dev = dev;
	return (0);
}

static int
hwpstate_settings(device_t dev, struct cf_setting *sets, int *count)
{
	struct hwpstate_softc *sc;
	struct hwpstate_setting set;
	int i;

	if (sets == NULL || count == NULL)
		return (EINVAL);
	sc = device_get_softc(dev);
	if (*count < sc->cfnum)
		return (E2BIG);
	for (i = 0; i < sc->cfnum; i++, sets++) {
		set = sc->hwpstate_settings[i];
		sets->freq = set.freq;
		sets->volts = set.volts;
		sets->power = set.power;
		sets->lat = set.lat;
		sets->dev = dev;
	}
	*count = sc->cfnum;

	return (0);
}

static int
hwpstate_type(device_t dev, int *type)
{

	if (type == NULL)
		return (EINVAL);

	*type = CPUFREQ_TYPE_ABSOLUTE;
	return (0);
}

static void
hwpstate_identify(driver_t *driver, device_t parent)
{

	if (device_find_child(parent, "hwpstate", -1) != NULL)
		return;

	if (cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10)
		return;

	/*
	 * Check if hardware pstate enable bit is set.
	 */
	if ((amd_pminfo & AMDPM_HW_PSTATE) == 0) {
		HWPSTATE_DEBUG(parent, "hwpstate enable bit is not set.\n");
		return;
	}

	if (resource_disabled("hwpstate", 0))
		return;

	if (BUS_ADD_CHILD(parent, 10, "hwpstate", -1) == NULL)
		device_printf(parent, "hwpstate: add child failed\n");
}

static int
hwpstate_probe(device_t dev)
{
	struct hwpstate_softc *sc;
	device_t perf_dev;
	uint64_t msr;
	int error, type;

	/*
	 * Only hwpstate0.
	 * It goes well with acpi_throttle.
	 */
	if (device_get_unit(dev) != 0)
		return (ENXIO);

	sc = device_get_softc(dev);
	sc->dev = dev;

	/*
	 * Check if acpi_perf has INFO only flag.
	 */
	perf_dev = device_find_child(device_get_parent(dev), "acpi_perf", -1);
	error = TRUE;
	if (perf_dev && device_is_attached(perf_dev)) {
		error = CPUFREQ_DRV_TYPE(perf_dev, &type);
		if (error == 0) {
			if ((type & CPUFREQ_FLAG_INFO_ONLY) == 0) {
				/*
				 * If acpi_perf doesn't have INFO_ONLY flag,
				 * it will take care of pstate transitions.
				 */
				HWPSTATE_DEBUG(dev, "acpi_perf will take care of pstate transitions.\n");
				return (ENXIO);
			} else {
				/*
				 * If acpi_perf has INFO_ONLY flag, (_PCT has FFixedHW)
				 * we can get _PSS info from acpi_perf
				 * without going into ACPI.
				 */
				HWPSTATE_DEBUG(dev, "going to fetch info from acpi_perf\n");
				error = hwpstate_get_info_from_acpi_perf(dev, perf_dev);
			}
		}
	}

	if (error == 0) {
		/*
		 * Now we get _PSS info from acpi_perf without error.
		 * Let's check it.
		 */
		msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
		if (sc->cfnum != 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr)) {
			HWPSTATE_DEBUG(dev, "msr and acpi _PSS count mismatch.\n");
			error = TRUE;
		}
	}

	/*
	 * If we cannot get info from acpi_perf,
	 * Let's get info from MSRs.
	 */
	if (error)
		error = hwpstate_get_info_from_msr(dev);
	if (error)
		return (error);

	device_set_desc(dev, "Cool`n'Quiet 2.0");
	return (0);
}

static int
hwpstate_attach(device_t dev)
{

	return (cpufreq_register(dev));
}

static int
hwpstate_get_info_from_msr(device_t dev)
{
	struct hwpstate_softc *sc;
	struct hwpstate_setting *hwpstate_set;
	uint64_t msr;
	int family, i, fid, did;

	family = CPUID_TO_FAMILY(cpu_id);
	sc = device_get_softc(dev);
	/* Get pstate count */
	msr = rdmsr(MSR_AMD_10H_11H_LIMIT);
	sc->cfnum = 1 + AMD_10H_11H_GET_PSTATE_MAX_VAL(msr);
	hwpstate_set = sc->hwpstate_settings;
	for (i = 0; i < sc->cfnum; i++) {
		msr = rdmsr(MSR_AMD_10H_11H_CONFIG + i);
		if ((msr & ((uint64_t)1 << 63)) != ((uint64_t)1 << 63)) {
			HWPSTATE_DEBUG(dev, "msr is not valid.\n");
			return (ENXIO);
		}
		did = AMD_10H_11H_CUR_DID(msr);
		fid = AMD_10H_11H_CUR_FID(msr);
		switch(family) {
		case 0x11:
			/* fid/did to frequency */
			hwpstate_set[i].freq = 100 * (fid + 0x08) / (1 << did);
			break;
		case 0x10:
			/* fid/did to frequency */
			hwpstate_set[i].freq = 100 * (fid + 0x10) / (1 << did);
			break;
		default:
			HWPSTATE_DEBUG(dev, "get_info_from_msr: AMD family %d CPU's are not implemented yet. sorry.\n", family);
			return (ENXIO);
			break;
		}
		hwpstate_set[i].pstate_id = i;
		/* There was volts calculation, but deleted it. */
		hwpstate_set[i].volts = CPUFREQ_VAL_UNKNOWN;
		hwpstate_set[i].power = CPUFREQ_VAL_UNKNOWN;
		hwpstate_set[i].lat = CPUFREQ_VAL_UNKNOWN;
	}
	return (0);
}

static int
hwpstate_get_info_from_acpi_perf(device_t dev, device_t perf_dev)
{
	struct hwpstate_softc *sc;
	struct cf_setting *perf_set;
	struct hwpstate_setting *hwpstate_set;
	int count, error, i;

	perf_set = malloc(MAX_SETTINGS * sizeof(*perf_set), M_TEMP, M_NOWAIT);
	if (perf_set == NULL) {
		HWPSTATE_DEBUG(dev, "nomem\n");
		return (ENOMEM);
	}
	/*
	 * Fetch settings from acpi_perf.
	 * Now it is attached, and has info only flag.
	 */
	count = MAX_SETTINGS;
	error = CPUFREQ_DRV_SETTINGS(perf_dev, perf_set, &count);
	if (error) {
		HWPSTATE_DEBUG(dev, "error: CPUFREQ_DRV_SETTINGS.\n");
		goto out;
	}
	sc = device_get_softc(dev);
	sc->cfnum = count;
	hwpstate_set = sc->hwpstate_settings;
	for (i = 0; i < count; i++) {
		if (i == perf_set[i].spec[0]) {
			hwpstate_set[i].pstate_id = i;
			hwpstate_set[i].freq = perf_set[i].freq;
			hwpstate_set[i].volts = perf_set[i].volts;
			hwpstate_set[i].power = perf_set[i].power;
			hwpstate_set[i].lat = perf_set[i].lat;
		} else {
			HWPSTATE_DEBUG(dev, "ACPI _PSS object mismatch.\n");
			error = ENXIO;
			goto out;
		}
	}
out:
	if (perf_set)
		free(perf_set, M_TEMP);
	return (error);
}

static int
hwpstate_detach(device_t dev)
{

	hwpstate_goto_pstate(dev, 0);
	return (cpufreq_unregister(dev));
}

static int
hwpstate_shutdown(device_t dev)
{

	/* hwpstate_goto_pstate(dev, 0); */
	return (0);
}

static int
hwpstate_features(driver_t *driver, u_int *features)
{

	/* Notify the ACPI CPU that we support direct access to MSRs */
	*features = ACPI_CAP_PERF_MSRS;
	return (0);
}
OpenPOWER on IntegriCloud