diff options
author | njl <njl@FreeBSD.org> | 2007-03-26 18:03:29 +0000 |
---|---|---|
committer | njl <njl@FreeBSD.org> | 2007-03-26 18:03:29 +0000 |
commit | 4933ca0aa02fff68e3e30186b454ed25d7d926fb (patch) | |
tree | 38a5baa5f3b7261150b7178ed80307e7f2bd65c9 | |
parent | 3cb53690e0c2c81ed3b336133bf003c7b3173abf (diff) | |
download | FreeBSD-src-4933ca0aa02fff68e3e30186b454ed25d7d926fb.zip FreeBSD-src-4933ca0aa02fff68e3e30186b454ed25d7d926fb.tar.gz |
Add an interface for drivers to be notified of changes to CPU frequency.
cpufreq_pre_change is called before the change, giving each driver a chance
to revoke the change. cpufreq_post_change provides the results of the
change (success or failure). cpufreq_levels_changed gives the unit number
of the cpufreq device whose number of available levels has changed. Hook
in all the drivers I could find that needed it.
* TSC: update TSC frequency value. When the available levels change, take the
highest possible level and notify the timecounter set_cputicker() of that
freq. This gets rid of the "calcru: runtime went backwards" messages.
* identcpu: updates the sysctl hw.clockrate value
* Profiling: if profiling is active when the clock changes, let the user
know the results may be inaccurate.
Reviewed by: bde, phk
MFC after: 1 month
-rw-r--r-- | sys/amd64/amd64/identcpu.c | 17 | ||||
-rw-r--r-- | sys/amd64/amd64/prof_machdep.c | 40 | ||||
-rw-r--r-- | sys/amd64/amd64/tsc.c | 97 | ||||
-rw-r--r-- | sys/contrib/altq/altq/altq_subr.c | 19 | ||||
-rw-r--r-- | sys/i386/i386/identcpu.c | 17 | ||||
-rw-r--r-- | sys/i386/i386/tsc.c | 97 | ||||
-rw-r--r-- | sys/i386/isa/prof_machdep.c | 56 | ||||
-rw-r--r-- | sys/kern/kern_cpu.c | 66 | ||||
-rw-r--r-- | sys/sys/cpu.h | 34 | ||||
-rw-r--r-- | sys/sys/eventhandler.h | 11 |
10 files changed, 406 insertions, 48 deletions
diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index d0eaee2..096a86f 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/eventhandler.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/sysctl.h> @@ -404,6 +406,21 @@ panicifcpuunsupported(void) } +/* Update TSC freq with the value indicated by the caller. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + + /* Total setting for this level gives the new frequency in MHz. */ + hw_clockrate = level->total_set.freq; +} + +EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, + EVENTHANDLER_PRI_ANY); + /* * Final stage of CPU identification. -- Should I check TI? */ diff --git a/sys/amd64/amd64/prof_machdep.c b/sys/amd64/amd64/prof_machdep.c index 48ea613..844c067 100644 --- a/sys/amd64/amd64/prof_machdep.c +++ b/sys/amd64/amd64/prof_machdep.c @@ -35,6 +35,9 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/eventhandler.h> #include <sys/gmon.h> #include <sys/kernel.h> #include <sys/smp.h> @@ -60,6 +63,9 @@ static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; static int cputime_clock_pmc_init; static struct gmonparam saved_gmp; #endif +#if defined(I586_CPU) || defined(I686_CPU) +static int cputime_prof_active; +#endif #endif /* GUPROF */ #ifdef __GNUCLIKE_ASM @@ -205,6 +211,7 @@ cputime() u_char high, low; static u_int prev_count; +#if defined(I586_CPU) || defined(I686_CPU) if (cputime_clock == CPUTIME_CLOCK_TSC) { /* * Scale the TSC a little to make cputime()'s frequency @@ -233,6 +240,7 @@ cputime() return (delta); } #endif /* PERFMON && I586_PMC_GUPROF && !SMP */ +#endif /* I586_CPU || I686_CPU */ /* * Read the current value of the 8254 timer counter 0. @@ -314,12 +322,17 @@ startguprof(gp) { if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { cputime_clock = CPUTIME_CLOCK_I8254; - if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus < 2) +#if defined(I586_CPU) || defined(I686_CPU) + if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus == 1) cputime_clock = CPUTIME_CLOCK_TSC; +#endif } gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; - if (cputime_clock == CPUTIME_CLOCK_TSC) +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_TSC) { gp->profrate = tsc_freq >> 1; + cputime_prof_active = 1; + } #if defined(PERFMON) && defined(I586_PMC_GUPROF) else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { if (perfmon_avail() && @@ -346,6 +359,7 @@ startguprof(gp) } } #endif /* PERFMON && I586_PMC_GUPROF */ +#endif /* I586_CPU || I686_CPU */ cputime_bias = 0; cputime(); } @@ -361,5 +375,27 @@ stopguprof(gp) cputime_clock_pmc_init = FALSE; } #endif +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_TSC) + cputime_prof_active = 0; +#endif } + +#if defined(I586_CPU) || defined(I686_CPU) +/* If the cpu frequency changed while profiling, report a warning. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC) + printf("warning: cpu freq changed while profiling active\n"); +} + +EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, + EVENTHANDLER_PRI_ANY); +#endif /* I586_CPU || I686_CPU */ + #endif /* GUPROF */ diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index 993991a..c4fae71 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -30,6 +30,9 @@ __FBSDID("$FreeBSD$"); #include "opt_clock.h" #include <sys/param.h> +#include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/malloc.h> #include <sys/systm.h> #include <sys/sysctl.h> #include <sys/time.h> @@ -41,8 +44,11 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/specialreg.h> +#include "cpufreq_if.h" + uint64_t tsc_freq; int tsc_is_broken; +static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; #ifdef SMP static int smp_tsc; @@ -51,14 +57,19 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); #endif +static void tsc_freq_changed(void *arg, const struct cf_level *level, + int status); +static void tsc_freq_changing(void *arg, const struct cf_level *level, + int *status); static unsigned tsc_get_timecount(struct timecounter *tc); +static void tsc_levels_changed(void *arg, int unit); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ - ~0u, /* counter_mask */ + ~0u, /* counter_mask */ 0, /* frequency */ - "TSC", /* name */ + "TSC", /* name */ 800, /* quality (adjusted in code) */ }; @@ -77,9 +88,23 @@ init_TSC(void) tsc_freq = tscval[1] - tscval[0]; if (bootverbose) printf("TSC clock: %lu Hz\n", tsc_freq); + + /* + * Inform CPU accounting about our boot-time clock rate. Once the + * system is finished booting, we will get the real max clock rate + * via tsc_freq_max(). This also will be updated if someone loads + * a cpufreq driver after boot that discovers a new max frequency. + */ set_cputicker(rdtsc, tsc_freq, 1); -} + /* Register to find out about changes in CPU frequency. */ + tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, + tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); + tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, + tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); + tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, + tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); +} void init_TSC_tc(void) @@ -104,6 +129,72 @@ init_TSC_tc(void) } } +/* + * When cpufreq levels change, find out about the (new) max frequency. We + * use this to update CPU accounting in case it got a lower estimate at boot. + */ +static void +tsc_levels_changed(void *arg, int unit) +{ + device_t cf_dev; + struct cf_level *levels; + int count, error; + uint64_t max_freq; + + /* Only use values from the first CPU, assuming all are equal. */ + if (unit != 0) + return; + + /* Find the appropriate cpufreq device instance. */ + cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); + if (cf_dev == NULL) { + printf("tsc_levels_changed() called but no cpufreq device?\n"); + return; + } + + /* Get settings from the device and find the max frequency. */ + count = 64; + levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); + if (levels == NULL) + return; + error = CPUFREQ_LEVELS(cf_dev, levels, &count); + if (error == 0 && count != 0) { + max_freq = (uint64_t)levels[0].total_set.freq * 1000000; + set_cputicker(rdtsc, max_freq, 1); + } else + printf("tsc_levels_changed: no max freq found\n"); + free(levels, M_TEMP); +} + +/* + * If the TSC timecounter is in use, veto the pending change. It may be + * possible in the future to handle a dynamically-changing timecounter rate. + */ +static void +tsc_freq_changing(void *arg, const struct cf_level *level, int *status) +{ + + if (*status != 0 || timecounter != &tsc_timecounter) + return; + + printf("timecounter TSC must not be in use when " + "changing frequencies; change denied\n"); + *status = EBUSY; +} + +/* Update TSC freq with the value indicated by the caller. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + + /* Total setting for this level gives the new frequency in MHz. */ + tsc_freq = (uint64_t)level->total_set.freq * 1000000; + tsc_timecounter.tc_frequency = tsc_freq; +} + static int sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { diff --git a/sys/contrib/altq/altq/altq_subr.c b/sys/contrib/altq/altq/altq_subr.c index 0b4d22d..fb6b91c 100644 --- a/sys/contrib/altq/altq/altq_subr.c +++ b/sys/contrib/altq/altq/altq_subr.c @@ -74,6 +74,9 @@ #if __FreeBSD__ < 3 #include "opt_cpu.h" /* for FreeBSD-2.2.8 to get i586_ctr_freq */ #endif +#include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/eventhandler.h> #include <machine/clock.h> #endif #if defined(__i386__) @@ -898,6 +901,22 @@ extern u_int64_t cycles_per_usec; /* alpha cpu clock frequency */ extern u_int64_t cpu_tsc_freq; #endif /* __alpha__ */ +#if (__FreeBSD_version >= 700035) +/* Update TSC freq with the value indicated by the caller. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + + /* Total setting for this level gives the new frequency in MHz. */ + machclk_freq = level->total_set.freq * 1000000; +} +EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, + EVENTHANDLER_PRI_ANY); +#endif /* __FreeBSD_version >= 700035 */ + void init_machclk(void) { diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c index 09ff188..c2f533c 100644 --- a/sys/i386/i386/identcpu.c +++ b/sys/i386/i386/identcpu.c @@ -45,6 +45,8 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/eventhandler.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/sysctl.h> @@ -1077,6 +1079,21 @@ identifycyrix(void) write_eflags(eflags); } +/* Update TSC freq with the value indicated by the caller. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + + /* Total setting for this level gives the new frequency in MHz. */ + hw_clockrate = level->total_set.freq; +} + +EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, + EVENTHANDLER_PRI_ANY); + /* * Final stage of CPU identification. -- Should I check TI? */ diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c index 5c0ca72..b2de9fe 100644 --- a/sys/i386/i386/tsc.c +++ b/sys/i386/i386/tsc.c @@ -30,6 +30,9 @@ __FBSDID("$FreeBSD$"); #include "opt_clock.h" #include <sys/param.h> +#include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/malloc.h> #include <sys/systm.h> #include <sys/sysctl.h> #include <sys/time.h> @@ -41,9 +44,12 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/specialreg.h> +#include "cpufreq_if.h" + uint64_t tsc_freq; int tsc_is_broken; u_int tsc_present; +static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; #ifdef SMP static int smp_tsc; @@ -52,14 +58,19 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); #endif +static void tsc_freq_changed(void *arg, const struct cf_level *level, + int status); +static void tsc_freq_changing(void *arg, const struct cf_level *level, + int *status); static unsigned tsc_get_timecount(struct timecounter *tc); +static void tsc_levels_changed(void *arg, int unit); static struct timecounter tsc_timecounter = { tsc_get_timecount, /* get_timecount */ 0, /* no poll_pps */ - ~0u, /* counter_mask */ + ~0u, /* counter_mask */ 0, /* frequency */ - "TSC", /* name */ + "TSC", /* name */ 800, /* quality (adjusted in code) */ }; @@ -86,9 +97,23 @@ init_TSC(void) tsc_freq = tscval[1] - tscval[0]; if (bootverbose) printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); + + /* + * Inform CPU accounting about our boot-time clock rate. Once the + * system is finished booting, we will get the real max clock rate + * via tsc_freq_max(). This also will be updated if someone loads + * a cpufreq driver after boot that discovers a new max frequency. + */ set_cputicker(rdtsc, tsc_freq, 1); -} + /* Register to find out about changes in CPU frequency. */ + tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, + tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); + tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, + tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); + tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, + tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); +} void init_TSC_tc(void) @@ -128,6 +153,72 @@ init_TSC_tc(void) } } +/* + * When cpufreq levels change, find out about the (new) max frequency. We + * use this to update CPU accounting in case it got a lower estimate at boot. + */ +static void +tsc_levels_changed(void *arg, int unit) +{ + device_t cf_dev; + struct cf_level *levels; + int count, error; + uint64_t max_freq; + + /* Only use values from the first CPU, assuming all are equal. */ + if (unit != 0) + return; + + /* Find the appropriate cpufreq device instance. */ + cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); + if (cf_dev == NULL) { + printf("tsc_levels_changed() called but no cpufreq device?\n"); + return; + } + + /* Get settings from the device and find the max frequency. */ + count = 64; + levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); + if (levels == NULL) + return; + error = CPUFREQ_LEVELS(cf_dev, levels, &count); + if (error == 0 && count != 0) { + max_freq = (uint64_t)levels[0].total_set.freq * 1000000; + set_cputicker(rdtsc, max_freq, 1); + } else + printf("tsc_levels_changed: no max freq found\n"); + free(levels, M_TEMP); +} + +/* + * If the TSC timecounter is in use, veto the pending change. It may be + * possible in the future to handle a dynamically-changing timecounter rate. + */ +static void +tsc_freq_changing(void *arg, const struct cf_level *level, int *status) +{ + + if (*status != 0 || timecounter != &tsc_timecounter) + return; + + printf("timecounter TSC must not be in use when " + "changing frequencies; change denied\n"); + *status = EBUSY; +} + +/* Update TSC freq with the value indicated by the caller. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + + /* Total setting for this level gives the new frequency in MHz. */ + tsc_freq = (uint64_t)level->total_set.freq * 1000000; + tsc_timecounter.tc_frequency = tsc_freq; +} + static int sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { diff --git a/sys/i386/isa/prof_machdep.c b/sys/i386/isa/prof_machdep.c index 2ad00bd..855745f 100644 --- a/sys/i386/isa/prof_machdep.c +++ b/sys/i386/isa/prof_machdep.c @@ -33,6 +33,9 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> +#include <sys/bus.h> +#include <sys/cpu.h> +#include <sys/eventhandler.h> #include <sys/gmon.h> #include <sys/kernel.h> #include <sys/smp.h> @@ -56,6 +59,9 @@ static u_int cputime_clock_pmc_conf = I586_PMC_GUPROF; static int cputime_clock_pmc_init; static struct gmonparam saved_gmp; #endif +#if defined(I586_CPU) || defined(I686_CPU) +static int cputime_prof_active; +#endif #endif /* GUPROF */ #ifdef __GNUCLIKE_ASM @@ -72,19 +78,19 @@ __mcount: \n\ # Check that we are profiling. Do it early for speed. \n\ # \n\ cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ - je .mcount_exit \n\ - # \n\ - # __mcount is the same as [.]mcount except the caller \n\ - # hasn't changed the stack except to call here, so the \n\ + je .mcount_exit \n\ + # \n\ + # __mcount is the same as [.]mcount except the caller \n\ + # hasn't changed the stack except to call here, so the \n\ # caller's raddr is above our raddr. \n\ # \n\ - movl 4(%esp),%edx \n\ - jmp .got_frompc \n\ - \n\ - .p2align 4,0x90 \n\ - .globl .mcount \n\ + movl 4(%esp),%edx \n\ + jmp .got_frompc \n\ + \n\ + .p2align 4,0x90 \n\ + .globl .mcount \n\ .mcount: \n\ - .globl __cyg_profile_func_enter \n\ + .globl __cyg_profile_func_enter \n\ __cyg_profile_func_enter: \n\ cmpl $GMON_PROF_OFF,_gmonparam+GM_STATE \n\ je .mcount_exit \n\ @@ -139,7 +145,7 @@ GMON_PROF_HIRES = 4 \n\ .p2align 4,0x90 \n\ .globl .mexitcount \n\ .mexitcount: \n\ - .globl __cyg_profile_func_exit \n\ + .globl __cyg_profile_func_exit \n\ __cyg_profile_func_exit: \n\ cmpl $GMON_PROF_HIRES,_gmonparam+GM_STATE \n\ jne .mexitcount_exit \n\ @@ -287,14 +293,16 @@ startguprof(gp) if (cputime_clock == CPUTIME_CLOCK_UNINITIALIZED) { cputime_clock = CPUTIME_CLOCK_I8254; #if defined(I586_CPU) || defined(I686_CPU) - if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus < 2) + if (tsc_freq != 0 && !tsc_is_broken && mp_ncpus == 1) cputime_clock = CPUTIME_CLOCK_TSC; #endif } gp->profrate = timer_freq << CPUTIME_CLOCK_I8254_SHIFT; #if defined(I586_CPU) || defined(I686_CPU) - if (cputime_clock == CPUTIME_CLOCK_TSC) + if (cputime_clock == CPUTIME_CLOCK_TSC) { gp->profrate = tsc_freq >> 1; + cputime_prof_active = 1; + } #if defined(PERFMON) && defined(I586_PMC_GUPROF) else if (cputime_clock == CPUTIME_CLOCK_I586_PMC) { if (perfmon_avail() && @@ -337,5 +345,27 @@ stopguprof(gp) cputime_clock_pmc_init = FALSE; } #endif +#if defined(I586_CPU) || defined(I686_CPU) + if (cputime_clock == CPUTIME_CLOCK_TSC) + cputime_prof_active = 0; +#endif +} + +#if defined(I586_CPU) || defined(I686_CPU) +/* If the cpu frequency changed while profiling, report a warning. */ +static void +tsc_freq_changed(void *arg, const struct cf_level *level, int status) +{ + + /* If there was an error during the transition, don't do anything. */ + if (status != 0) + return; + if (cputime_prof_active && cputime_clock == CPUTIME_CLOCK_TSC) + printf("warning: cpu freq changed while profiling active\n"); } + +EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, + EVENTHANDLER_PRI_ANY); +#endif /* I586_CPU || I686_CPU */ + #endif /* GUPROF */ diff --git a/sys/kern/kern_cpu.c b/sys/kern/kern_cpu.c index eb5bbb8..3d2adfd 100644 --- a/sys/kern/kern_cpu.c +++ b/sys/kern/kern_cpu.c @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2004-2005 Nate Lawson (SDG) + * Copyright (c) 2004-2007 Nate Lawson (SDG) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include <sys/sbuf.h> #include <sys/sx.h> #include <sys/timetc.h> +#include <sys/taskqueue.h> #include "cpufreq_if.h" @@ -73,6 +74,7 @@ struct cpufreq_softc { int max_mhz; device_t dev; struct sysctl_ctx_list sysctl_ctx; + struct task startup_task; }; struct cf_setting_array { @@ -94,8 +96,8 @@ TAILQ_HEAD(cf_setting_lst, cf_setting_array); } while (0) static int cpufreq_attach(device_t dev); +static void cpufreq_startup_task(void *ctx, int pending); static int cpufreq_detach(device_t dev); -static void cpufreq_evaluate(void *arg); static int cf_set_method(device_t dev, const struct cf_level *level, int priority); static int cf_get_method(device_t dev, struct cf_level *level); @@ -127,8 +129,6 @@ static driver_t cpufreq_driver = { static devclass_t cpufreq_dc; DRIVER_MODULE(cpufreq, cpu, cpufreq_driver, cpufreq_dc, 0, 0); -static eventhandler_tag cf_ev_tag; - static int cf_lowest_freq; static int cf_verbose; TUNABLE_INT("debug.cpufreq.lowest", &cf_lowest_freq); @@ -176,12 +176,25 @@ cpufreq_attach(device_t dev) SYSCTL_CHILDREN(device_get_sysctl_tree(parent)), OID_AUTO, "freq_levels", CTLTYPE_STRING | CTLFLAG_RD, sc, 0, cpufreq_levels_sysctl, "A", "CPU frequency levels"); - cf_ev_tag = EVENTHANDLER_REGISTER(cpufreq_changed, cpufreq_evaluate, - NULL, EVENTHANDLER_PRI_ANY); + + /* + * Queue a one-shot broadcast that levels have changed. + * It will run once the system has completed booting. + */ + TASK_INIT(&sc->startup_task, 0, cpufreq_startup_task, dev); + taskqueue_enqueue(taskqueue_thread, &sc->startup_task); return (0); } +/* Handle any work to be done for all drivers that attached during boot. */ +static void +cpufreq_startup_task(void *ctx, int pending) +{ + + cpufreq_settings_changed((device_t)ctx); +} + static int cpufreq_detach(device_t dev) { @@ -202,18 +215,11 @@ cpufreq_detach(device_t dev) numdevs = devclass_get_count(cpufreq_dc); if (numdevs == 1) { CF_DEBUG("final shutdown for %s\n", device_get_nameunit(dev)); - EVENTHANDLER_DEREGISTER(cpufreq_changed, cf_ev_tag); } return (0); } -static void -cpufreq_evaluate(void *arg) -{ - /* TODO: Re-evaluate when notified of changes to drivers. */ -} - static int cf_set_method(device_t dev, const struct cf_level *level, int priority) { @@ -222,25 +228,17 @@ cf_set_method(device_t dev, const struct cf_level *level, int priority) struct cf_saved_freq *saved_freq, *curr_freq; struct pcpu *pc; int cpu_id, error, i; - static int once; sc = device_get_softc(dev); error = 0; set = NULL; saved_freq = NULL; - /* - * Check that the TSC isn't being used as a timecounter. - * If it is, then return EBUSY and refuse to change the - * clock speed. - */ - if (strcmp(timecounter->tc_name, "TSC") == 0) { - if (!once) { - printf("cpufreq: frequency change with timecounter" - " TSC not allowed, see cpufreq(4)\n"); - once = 1; - } - return (EBUSY); + /* We are going to change levels so notify the pre-change handler. */ + EVENTHANDLER_INVOKE(cpufreq_pre_change, level, &error); + if (error != 0) { + EVENTHANDLER_INVOKE(cpufreq_post_change, level, error); + return (error); } CF_MTX_LOCK(&sc->lock); @@ -378,8 +376,15 @@ skip: out: CF_MTX_UNLOCK(&sc->lock); + + /* + * We changed levels (or attempted to) so notify the post-change + * handler of new frequency or error. + */ + EVENTHANDLER_INVOKE(cpufreq_post_change, level, error); if (error && set) device_printf(set->dev, "set freq failed, err %d\n", error); + return (error); } @@ -1021,3 +1026,12 @@ cpufreq_unregister(device_t dev) return (0); } + +int +cpufreq_settings_changed(device_t dev) +{ + + EVENTHANDLER_INVOKE(cpufreq_levels_changed, + device_get_unit(device_get_parent(dev))); + return (0); +} diff --git a/sys/sys/cpu.h b/sys/sys/cpu.h index e2337ab..d282d30 100644 --- a/sys/sys/cpu.h +++ b/sys/sys/cpu.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2005 Nate Lawson (SDG) + * Copyright (c) 2005-2007 Nate Lawson (SDG) * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,6 +29,8 @@ #ifndef _SYS_CPU_H_ #define _SYS_CPU_H_ +#include <sys/eventhandler.h> + /* * CPU device support. */ @@ -118,6 +120,36 @@ TAILQ_HEAD(cf_level_lst, cf_level); int cpufreq_register(device_t dev); int cpufreq_unregister(device_t dev); +/* + * Notify the cpufreq core that the number of or values for settings have + * changed. + */ +int cpufreq_settings_changed(device_t dev); + +/* + * Eventhandlers that are called before and after a change in frequency. + * The new level and the result of the change (0 is success) is passed in. + * If the driver wishes to revoke the change from cpufreq_pre_change, it + * stores a non-zero error code in the result parameter and the change will + * not be made. If the post-change eventhandler gets a non-zero result, + * no change was made and the previous level remains in effect. If a change + * is revoked, the post-change eventhandler is still called with the error + * value supplied by the revoking driver. This gives listeners who cached + * some data in preparation for a level change a chance to clean up. + */ +typedef void (*cpufreq_pre_notify_fn)(void *, const struct cf_level *, int *); +typedef void (*cpufreq_post_notify_fn)(void *, const struct cf_level *, int); +EVENTHANDLER_DECLARE(cpufreq_pre_change, cpufreq_pre_notify_fn); +EVENTHANDLER_DECLARE(cpufreq_post_change, cpufreq_post_notify_fn); + +/* + * Eventhandler called when the available list of levels changed. + * The unit number of the device (i.e. "cpufreq0") whose levels changed + * is provided so the listener can retrieve the new list of levels. + */ +typedef void (*cpufreq_levels_notify_fn)(void *, int); +EVENTHANDLER_DECLARE(cpufreq_levels_changed, cpufreq_levels_notify_fn); + /* Allow values to be +/- a bit since sometimes we have to estimate. */ #define CPUFREQ_CMP(x, y) (abs((x) - (y)) < 25) diff --git a/sys/sys/eventhandler.h b/sys/sys/eventhandler.h index f4f1b8c..d18b635 100644 --- a/sys/sys/eventhandler.h +++ b/sys/sys/eventhandler.h @@ -104,6 +104,17 @@ struct eventhandler_entry_ ## name \ }; \ struct __hack +#define EVENTHANDLER_DEFINE(name, func, arg, priority) \ + static eventhandler_tag name ## _tag; \ + static void name ## _evh_init(void *ctx) \ + { \ + name ## _tag = EVENTHANDLER_REGISTER(name, func, ctx, \ + priority); \ + } \ + SYSINIT(name ## _evh_init, SI_SUB_CONFIGURE, SI_ORDER_ANY, \ + name ## _evh_init, arg) \ + struct __hack + #define EVENTHANDLER_INVOKE(name, ...) \ do { \ struct eventhandler_list *_el; \ |