diff options
-rw-r--r-- | sys/amd64/amd64/machdep.c | 211 | ||||
-rw-r--r-- | sys/amd64/include/pcpu.h | 1 | ||||
-rw-r--r-- | sys/arm/arm/machdep.c | 9 | ||||
-rw-r--r-- | sys/i386/i386/machdep.c | 209 | ||||
-rw-r--r-- | sys/i386/include/pcpu.h | 1 | ||||
-rw-r--r-- | sys/ia64/ia64/machdep.c | 9 | ||||
-rw-r--r-- | sys/kern/sched_4bsd.c | 2 | ||||
-rw-r--r-- | sys/kern/sched_ule.c | 14 | ||||
-rw-r--r-- | sys/mips/mips/machdep.c | 9 | ||||
-rw-r--r-- | sys/pc98/pc98/machdep.c | 9 | ||||
-rw-r--r-- | sys/powerpc/aim/machdep.c | 9 | ||||
-rw-r--r-- | sys/powerpc/booke/machdep.c | 9 | ||||
-rw-r--r-- | sys/sparc64/sparc64/machdep.c | 9 | ||||
-rw-r--r-- | sys/sun4v/sun4v/machdep.c | 9 | ||||
-rw-r--r-- | sys/sys/proc.h | 3 |
15 files changed, 418 insertions, 95 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 43fbfc3..42fc22c 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" +#include "opt_sched.h" #include <sys/param.h> #include <sys/proc.h> @@ -527,62 +528,192 @@ cpu_halt(void) __asm__ ("hlt"); } -/* - * Hook to idle the CPU when possible. In the SMP case we default to - * off because a halted cpu will not currently pick up a new thread in the - * run queue until the next timer tick. If turned on this will result in - * approximately a 4.2% loss in real time performance in buildworld tests - * (but improves user and sys times oddly enough), and saves approximately - * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). - * - * XXX we need to have a cpu mask of idle cpus and generate an IPI or - * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. - * Then we can have our cake and eat it too. - * - * XXX I'm turning it on for SMP as well by default for now. It seems to - * help lock contention somewhat, and this is critical for HTT. -Peter - */ -static int cpu_idle_hlt = 1; -TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt); -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, - &cpu_idle_hlt, 0, "Idle loop HLT enable"); +void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ static void -cpu_idle_default(void) +cpu_idle_hlt(int busy) { /* - * we must absolutely guarentee that hlt is the - * absolute next instruction after sti or we - * introduce a timing window. + * we must absolutely guarentee that hlt is the next instruction + * after sti or we introduce a timing window. */ - __asm __volatile("sti; hlt"); + disable_intr(); + if (sched_runnable()) + enable_intr(); + else + __asm __volatile("sti; hlt"); } -/* - * Note that we have to be careful here to avoid a race between checking - * sched_runnable() and actually halting. If we don't do this, we may waste - * the time between calling hlt and the next interrupt even though there - * is a runnable process. - */ -void -cpu_idle(void) +static void +cpu_idle_acpi(int busy) { + disable_intr(); + if (sched_runnable()) + enable_intr(); + else if (cpu_idle_hook) + cpu_idle_hook(); + else + __asm __volatile("sti; hlt"); +} +static void +cpu_idle_spin(int busy) +{ + return; +} + +void (*cpu_idle_fn)(int) = cpu_idle_acpi; + +void +cpu_idle(int busy) +{ #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif - if (cpu_idle_hlt) { - disable_intr(); - if (sched_runnable()) - enable_intr(); - else - (*cpu_idle_hook)(); + cpu_idle_fn(busy); +} + +/* + * mwait cpu power states. Lower 4 bits are sub-states. + */ +#define MWAIT_C0 0xf0 +#define MWAIT_C1 0x00 +#define MWAIT_C2 0x10 +#define MWAIT_C3 0x20 +#define MWAIT_C4 0x30 + +#define MWAIT_DISABLED 0x0 +#define MWAIT_WOKEN 0x1 +#define MWAIT_WAITING 0x2 + +static void +cpu_idle_mwait(int busy) +{ + int *mwait; + + mwait = (int *)PCPU_PTR(monitorbuf); + *mwait = MWAIT_WAITING; + if (sched_runnable()) + return; + cpu_monitor(mwait, 0, 0); + if (*mwait == MWAIT_WAITING) + cpu_mwait(0, MWAIT_C1); +} + +static void +cpu_idle_mwait_hlt(int busy) +{ + int *mwait; + + mwait = (int *)PCPU_PTR(monitorbuf); + if (busy == 0) { + *mwait = MWAIT_DISABLED; + cpu_idle_hlt(busy); + return; + } + *mwait = MWAIT_WAITING; + if (sched_runnable()) + return; + cpu_monitor(mwait, 0, 0); + if (*mwait == MWAIT_WAITING) + cpu_mwait(0, MWAIT_C1); +} + +int +cpu_idle_wakeup(int cpu) +{ + struct pcpu *pcpu; + int *mwait; + + if (cpu_idle_fn == cpu_idle_spin) + return (1); + if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) + return (0); + pcpu = pcpu_find(cpu); + mwait = (int *)pcpu->pc_monitorbuf; + /* + * This doesn't need to be atomic since missing the race will + * simply result in unnecessary IPIs. + */ + if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) + return (0); + *mwait = MWAIT_WOKEN; + + return (1); +} + +/* + * Ordered by speed/power consumption. + */ +struct { + void *id_fn; + char *id_name; +} idle_tbl[] = { + { cpu_idle_spin, "spin" }, + { cpu_idle_mwait, "mwait" }, + { cpu_idle_mwait_hlt, "mwait_hlt" }, + { cpu_idle_hlt, "hlt" }, + { cpu_idle_acpi, "acpi" }, + { NULL, NULL } +}; + +static int +idle_sysctl_available(SYSCTL_HANDLER_ARGS) +{ + char *avail, *p; + int error; + int i; + + avail = malloc(256, M_TEMP, M_WAITOK); + p = avail; + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (strstr(idle_tbl[i].id_name, "mwait") && + (cpu_feature2 & CPUID2_MON) == 0) + continue; + p += sprintf(p, "%s, ", idle_tbl[i].id_name); + } + error = sysctl_handle_string(oidp, avail, 0, req); + free(avail, M_TEMP); + return (error); +} + +static int +idle_sysctl(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + int error; + char *p; + int i; + + p = "unknown"; + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (idle_tbl[i].id_fn == cpu_idle_fn) { + p = idle_tbl[i].id_name; + break; + } } + strncpy(buf, p, sizeof(buf)); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return (error); + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (strstr(idle_tbl[i].id_name, "mwait") && + (cpu_feature2 & CPUID2_MON) == 0) + continue; + if (strcmp(idle_tbl[i].id_name, buf)) + continue; + cpu_idle_fn = idle_tbl[i].id_fn; + return (0); + } + return (EINVAL); } -/* Other subsystems (e.g., ACPI) can hook this later. */ -void (*cpu_idle_hook)(void) = cpu_idle_default; +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, idle_sysctl_available, "A", "list of available idle functions"); + +SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, + idle_sysctl, "A", "currently selected idle function"); /* * Clear registers on exec diff --git a/sys/amd64/include/pcpu.h b/sys/amd64/include/pcpu.h index 9245bbe..444109e 100644 --- a/sys/amd64/include/pcpu.h +++ b/sys/amd64/include/pcpu.h @@ -43,6 +43,7 @@ * other processors" */ #define PCPU_MD_FIELDS \ + char pc_monitorbuf[128] __aligned(128); /* cache line */ \ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ struct amd64tss *pc_tssp; \ diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index f4779b7..4bd7886 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -326,12 +326,19 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) } void -cpu_idle(void) +cpu_idle(int busy) { cpu_sleep(0); } int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + +int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index b1a0d74..e38015f 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1128,63 +1128,192 @@ cpu_halt(void) __asm__ ("hlt"); } -/* - * Hook to idle the CPU when possible. In the SMP case we default to - * off because a halted cpu will not currently pick up a new thread in the - * run queue until the next timer tick. If turned on this will result in - * approximately a 4.2% loss in real time performance in buildworld tests - * (but improves user and sys times oddly enough), and saves approximately - * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). - * - * XXX we need to have a cpu mask of idle cpus and generate an IPI or - * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. - * Then we can have our cake and eat it too. - * - * XXX I'm turning it on for SMP as well by default for now. It seems to - * help lock contention somewhat, and this is critical for HTT. -Peter - */ -static int cpu_idle_hlt = 1; -TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt); -SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, - &cpu_idle_hlt, 0, "Idle loop HLT enable"); +void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ static void -cpu_idle_default(void) +cpu_idle_hlt(int busy) { /* - * we must absolutely guarentee that hlt is the - * absolute next instruction after sti or we - * introduce a timing window. + * we must absolutely guarentee that hlt is the next instruction + * after sti or we introduce a timing window. */ - __asm __volatile("sti; hlt"); + disable_intr(); + if (sched_runnable()) + enable_intr(); + else + __asm __volatile("sti; hlt"); } -/* - * Note that we have to be careful here to avoid a race between checking - * sched_runnable() and actually halting. If we don't do this, we may waste - * the time between calling hlt and the next interrupt even though there - * is a runnable process. - */ -void -cpu_idle(void) +static void +cpu_idle_acpi(int busy) { + disable_intr(); + if (sched_runnable()) + enable_intr(); + else if (cpu_idle_hook) + cpu_idle_hook(); + else + __asm __volatile("sti; hlt"); +} +static void +cpu_idle_spin(int busy) +{ + return; +} + +void (*cpu_idle_fn)(int) = cpu_idle_acpi; + +void +cpu_idle(int busy) +{ #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif + cpu_idle_fn(busy); +} + +/* + * mwait cpu power states. Lower 4 bits are sub-states. + */ +#define MWAIT_C0 0xf0 +#define MWAIT_C1 0x00 +#define MWAIT_C2 0x10 +#define MWAIT_C3 0x20 +#define MWAIT_C4 0x30 + +#define MWAIT_DISABLED 0x0 +#define MWAIT_WOKEN 0x1 +#define MWAIT_WAITING 0x2 - if (cpu_idle_hlt) { - disable_intr(); - if (sched_runnable()) - enable_intr(); - else - (*cpu_idle_hook)(); +static void +cpu_idle_mwait(int busy) +{ + int *mwait; + + mwait = (int *)PCPU_PTR(monitorbuf); + *mwait = MWAIT_WAITING; + if (sched_runnable()) + return; + cpu_monitor(mwait, 0, 0); + if (*mwait == MWAIT_WAITING) + cpu_mwait(0, MWAIT_C1); +} + +static void +cpu_idle_mwait_hlt(int busy) +{ + int *mwait; + + mwait = (int *)PCPU_PTR(monitorbuf); + if (busy == 0) { + *mwait = MWAIT_DISABLED; + cpu_idle_hlt(busy); + return; } + *mwait = MWAIT_WAITING; + if (sched_runnable()) + return; + cpu_monitor(mwait, 0, 0); + if (*mwait == MWAIT_WAITING) + cpu_mwait(0, MWAIT_C1); } -/* Other subsystems (e.g., ACPI) can hook this later. */ -void (*cpu_idle_hook)(void) = cpu_idle_default; +int +cpu_idle_wakeup(int cpu) +{ + struct pcpu *pcpu; + int *mwait; + + if (cpu_idle_fn == cpu_idle_spin) + return (1); + if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) + return (0); + pcpu = pcpu_find(cpu); + mwait = (int *)pcpu->pc_monitorbuf; + /* + * This doesn't need to be atomic since missing the race will + * simply result in unnecessary IPIs. + */ + if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) + return (0); + *mwait = MWAIT_WOKEN; + + return (1); +} + +/* + * Ordered by speed/power consumption. + */ +struct { + void *id_fn; + char *id_name; +} idle_tbl[] = { + { cpu_idle_spin, "spin" }, + { cpu_idle_mwait, "mwait" }, + { cpu_idle_mwait_hlt, "mwait_hlt" }, + { cpu_idle_hlt, "hlt" }, + { cpu_idle_acpi, "acpi" }, + { NULL, NULL } +}; + +static int +idle_sysctl_available(SYSCTL_HANDLER_ARGS) +{ + char *avail, *p; + int error; + int i; + + avail = malloc(256, M_TEMP, M_WAITOK); + p = avail; + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (strstr(idle_tbl[i].id_name, "mwait") && + (cpu_feature2 & CPUID2_MON) == 0) + continue; + p += sprintf(p, "%s, ", idle_tbl[i].id_name); + } + error = sysctl_handle_string(oidp, avail, 0, req); + free(avail, M_TEMP); + return (error); +} + +static int +idle_sysctl(SYSCTL_HANDLER_ARGS) +{ + char buf[16]; + int error; + char *p; + int i; + + p = "unknown"; + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (idle_tbl[i].id_fn == cpu_idle_fn) { + p = idle_tbl[i].id_name; + break; + } + } + strncpy(buf, p, sizeof(buf)); + error = sysctl_handle_string(oidp, buf, sizeof(buf), req); + if (error != 0 || req->newptr == NULL) + return (error); + for (i = 0; idle_tbl[i].id_name != NULL; i++) { + if (strstr(idle_tbl[i].id_name, "mwait") && + (cpu_feature2 & CPUID2_MON) == 0) + continue; + if (strcmp(idle_tbl[i].id_name, buf)) + continue; + cpu_idle_fn = idle_tbl[i].id_fn; + return (0); + } + return (EINVAL); +} + +SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, + 0, 0, idle_sysctl_available, "A", "list of available idle functions"); + +SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, + idle_sysctl, "A", "currently selected idle function"); /* * Clear registers on exec diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index 67cb530..c28ae64 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -46,6 +46,7 @@ * other processors" */ #define PCPU_MD_FIELDS \ + char pc_monitorbuf[128] __aligned(128); /* cache line */ \ struct pcpu *pc_prvspace; /* Self-reference */ \ struct pmap *pc_curpmap; \ struct i386tss pc_common_tss; \ diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c index 45e57de..d6ed3c3 100644 --- a/sys/ia64/ia64/machdep.c +++ b/sys/ia64/ia64/machdep.c @@ -335,7 +335,7 @@ cpu_halt() } static void -cpu_idle_default(void) +cpu_idle_default(int busy) { struct ia64_pal_result res; @@ -348,6 +348,13 @@ cpu_idle() (*cpu_idle_hook)(); } +int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index ed5cf62..cefe4ac 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -1443,7 +1443,7 @@ sched_idletd(void *dummy) mtx_assert(&Giant, MA_NOTOWNED); while (sched_runnable() == 0) - cpu_idle(); + cpu_idle(0); mtx_lock_spin(&sched_lock); mi_switch(SW_VOL | SWT_IDLE, NULL); diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 7f5b597..7fe80af 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -954,6 +954,12 @@ tdq_notify(struct tdq *tdq, struct thread *td) */ if (tdq->tdq_idlestate == TDQ_RUNNING) return; + /* + * If the MD code has an idle wakeup routine try that before + * falling back to IPI. + */ + if (cpu_idle_wakeup(cpu)) + return; } tdq->tdq_ipipending = 1; ipi_selected(1 << cpu, IPI_PREEMPT); @@ -2095,10 +2101,7 @@ sched_clock(struct thread *td) * If there is some activity seed it to reflect that. */ tdq->tdq_oldswitchcnt = tdq->tdq_switchcnt; - if (tdq->tdq_load) - tdq->tdq_switchcnt = 2; - else - tdq->tdq_switchcnt = 0; + tdq->tdq_switchcnt = tdq->tdq_load; /* * Advance the insert index once for each tick to ensure that all * threads get a chance to run. @@ -2507,9 +2510,10 @@ sched_idletd(void *dummy) * tdq_notify(). */ if (tdq->tdq_load == 0) { + switchcnt = tdq->tdq_switchcnt + tdq->tdq_oldswitchcnt; tdq->tdq_idlestate = TDQ_IDLE; if (tdq->tdq_load == 0) - cpu_idle(); + cpu_idle(switchcnt > 1); } if (tdq->tdq_load) { thread_lock(td); diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c index 67cb612..28d1fb3 100644 --- a/sys/mips/mips/machdep.c +++ b/sys/mips/mips/machdep.c @@ -527,7 +527,7 @@ get_cyclecount(void) * call platform specific code to halt (until next interrupt) for the idle loop */ void -cpu_idle(void) +cpu_idle(int busy) { if (mips_cp0_status_read() & SR_INT_ENAB) __asm __volatile ("wait"); @@ -535,6 +535,13 @@ cpu_idle(void) panic("ints disabled in idleproc!"); } +int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + void dumpsys(struct dumperinfo *di __unused) { diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 942b804..b77f67e 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1133,7 +1133,7 @@ cpu_idle_default(void) * is a runnable process. */ void -cpu_idle(void) +cpu_idle(int busy) { #ifdef SMP @@ -1150,6 +1150,13 @@ cpu_idle(void) } } +int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c index d407314..15decb7 100644 --- a/sys/powerpc/aim/machdep.c +++ b/sys/powerpc/aim/machdep.c @@ -730,7 +730,7 @@ cpu_halt(void) } void -cpu_idle(void) +cpu_idle(int busy) { uint32_t msr; @@ -750,6 +750,13 @@ cpu_idle(void) } } +int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + /* * Set set up registers on exec. */ diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c index 75e5cb6..132ce9c 100644 --- a/sys/powerpc/booke/machdep.c +++ b/sys/powerpc/booke/machdep.c @@ -696,7 +696,7 @@ freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) * Set Wait state enable. */ void -cpu_idle (void) +cpu_idle (int busy) { register_t msr; @@ -723,6 +723,13 @@ cpu_idle (void) #endif } +int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + void spinlock_enter(void) { diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c index 7d1aa02..35e1fed 100644 --- a/sys/sparc64/sparc64/machdep.c +++ b/sys/sparc64/sparc64/machdep.c @@ -750,12 +750,19 @@ sparc64_shutdown_final(void *dummy, int howto) } void -cpu_idle(void) +cpu_idle(int busy) { /* Insert code to halt (until next interrupt) for the idle loop */ } int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + +int ptrace_set_pc(struct thread *td, u_long addr) { diff --git a/sys/sun4v/sun4v/machdep.c b/sys/sun4v/sun4v/machdep.c index 1d0bf66..6281f00 100644 --- a/sys/sun4v/sun4v/machdep.c +++ b/sys/sun4v/sun4v/machdep.c @@ -819,7 +819,7 @@ sparc64_shutdown_final(void *dummy, int howto) } void -cpu_idle(void) +cpu_idle(int busy) { if (rdpr(pil) != 0) @@ -832,6 +832,13 @@ cpu_idle(void) } int +cpu_idle_wakeup(int cpu) +{ + + return (0); +} + +int ptrace_set_pc(struct thread *td, u_long addr) { diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 86adbb1..e320354 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -808,7 +808,8 @@ int sigonstack(size_t sp); void sleepinit(void); void stopevent(struct proc *, u_int, u_int); void threadinit(void); -void cpu_idle(void); +void cpu_idle(int); +int cpu_idle_wakeup(int); extern void (*cpu_idle_hook)(void); /* Hook to machdep CPU idler. */ void cpu_switch(struct thread *, struct thread *, struct mtx *); void cpu_throw(struct thread *, struct thread *) __dead2; |