summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2010-09-13 07:25:35 +0000
committermav <mav@FreeBSD.org>2010-09-13 07:25:35 +0000
commiteb4931dc6c47d705fca4c3e160bd493431978123 (patch)
treeb0666da99693a46e1663a3a882abfdee5e324830 /sys
parent18db545520f1688cfad01d6db6299ebdcb6c2c16 (diff)
downloadFreeBSD-src-eb4931dc6c47d705fca4c3e160bd493431978123.zip
FreeBSD-src-eb4931dc6c47d705fca4c3e160bd493431978123.tar.gz
Refactor timer management code with priority to one-shot operation mode.
The main goal of this is to generate timer interrupts only when there is some work to do. When CPU is busy interrupts are generating at full rate of hz + stathz to fullfill scheduler and timekeeping requirements. But when CPU is idle, only minimum set of interrupts (down to 8 interrupts per second per CPU now), needed to handle scheduled callouts is executed. This allows significantly increase idle CPU sleep time, increasing effect of static power-saving technologies. Also it should reduce host CPU load on virtualized systems, when guest system is idle. There is set of tunables, also available as writable sysctls, allowing to control wanted event timer subsystem behavior: kern.eventtimer.timer - allows to choose event timer hardware to use. On x86 there is up to 4 different kinds of timers. Depending on whether chosen timer is per-CPU, behavior of other options slightly differs. kern.eventtimer.periodic - allows to choose periodic and one-shot operation mode. In periodic mode, current timer hardware taken as the only source of time for time events. This mode is quite alike to previous kernel behavior. One-shot mode instead uses currently selected time counter hardware to schedule all needed events one by one and program timer to generate interrupt exactly in specified time. Default value depends of chosen timer capabilities, but one-shot mode is preferred, until other is forced by user or hardware. kern.eventtimer.singlemul - in periodic mode specifies how much times higher timer frequency should be, to not strictly alias hardclock() and statclock() events. Default values are 2 and 4, but could be reduced to 1 if extra interrupts are unwanted. kern.eventtimer.idletick - makes each CPU to receive every timer interrupt independently of whether they busy or not. By default this options is disabled. If chosen timer is per-CPU and runs in periodic mode, this option has no effect - all interrupts are generating. As soon as this patch modifies cpu_idle() on some platforms, I have also refactored one on x86. Now it makes use of MONITOR/MWAIT instrunctions (if supported) under high sleep/wakeup rate, as fast alternative to other methods. It allows SMP scheduler to wake up sleeping CPUs much faster without using IPI, significantly increasing performance on some highly task-switching loads. Tested by: many (on i386, amd64, sparc64 and powerc) H/W donated by: Gheorghe Ardelean Sponsored by: iXsystems, Inc.
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/machdep.c228
-rw-r--r--sys/amd64/amd64/mp_machdep.c24
-rw-r--r--sys/amd64/include/apicvar.h3
-rw-r--r--sys/dev/acpica/acpi_cpu.c18
-rw-r--r--sys/dev/acpica/acpi_hpet.c6
-rw-r--r--sys/i386/i386/machdep.c252
-rw-r--r--sys/i386/i386/mp_machdep.c24
-rw-r--r--sys/i386/include/apicvar.h6
-rw-r--r--sys/kern/kern_clock.c140
-rw-r--r--sys/kern/kern_clocksource.c863
-rw-r--r--sys/kern/kern_et.c2
-rw-r--r--sys/kern/kern_tc.c18
-rw-r--r--sys/kern/kern_timeout.c39
-rw-r--r--sys/kern/sched_4bsd.c2
-rw-r--r--sys/kern/sched_ule.c8
-rw-r--r--sys/mips/include/smp.h1
-rw-r--r--sys/mips/mips/mp_machdep.c6
-rw-r--r--sys/pc98/pc98/machdep.c141
-rw-r--r--sys/powerpc/aim/machdep.c12
-rw-r--r--sys/powerpc/booke/machdep.c12
-rw-r--r--sys/powerpc/include/smp.h1
-rw-r--r--sys/powerpc/powerpc/mp_machdep.c2
-rw-r--r--sys/sparc64/include/intr_machdep.h1
-rw-r--r--sys/sparc64/include/smp.h1
-rw-r--r--sys/sparc64/sparc64/intr_machdep.c3
-rw-r--r--sys/sparc64/sparc64/mp_machdep.c21
-rw-r--r--sys/sun4v/include/intr_machdep.h1
-rw-r--r--sys/sun4v/include/smp.h2
-rw-r--r--sys/sun4v/sun4v/intr_machdep.c4
-rw-r--r--sys/sun4v/sun4v/mp_machdep.c19
-rw-r--r--sys/sys/callout.h3
-rw-r--r--sys/sys/sched.h2
-rw-r--r--sys/sys/systm.h10
-rw-r--r--sys/sys/timeet.h4
-rw-r--r--sys/sys/timetc.h1
-rw-r--r--sys/x86/x86/local_apic.c2
36 files changed, 1176 insertions, 706 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index fe68600..00182db 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -585,59 +585,89 @@ cpu_halt(void)
}
void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
static void
-cpu_idle_hlt(int busy)
+cpu_idle_acpi(int busy)
{
- /*
- * we must absolutely guarentee that hlt is the next instruction
- * after sti or we introduce a timing window.
- */
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
+ else if (cpu_idle_hook)
+ cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
static void
-cpu_idle_acpi(int busy)
+cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+ /*
+ * We must absolutely guarentee that hlt is the next instruction
+ * after sti or we introduce a timing window.
+ */
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
-static int cpu_ident_amdc1e = 0;
+/*
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
+ */
+#define MWAIT_C0 0xf0
+#define MWAIT_C1 0x00
+#define MWAIT_C2 0x10
+#define MWAIT_C3 0x20
+#define MWAIT_C4 0x30
-static int
-cpu_probe_amdc1e(void)
+static void
+cpu_idle_mwait(int busy)
{
- int i;
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+ if (!sched_runnable()) {
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ cpu_mwait(0, MWAIT_C1);
+ }
+ *state = STATE_RUNNING;
+}
- /*
- * Forget it, if we're not using local APIC timer.
- */
- if (resource_disabled("apic", 0) ||
- (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
- return (0);
+static void
+cpu_idle_spin(int busy)
+{
+ int *state;
+ int i;
- /*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- return (1);
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
-
- return (0);
}
/*
@@ -655,110 +685,83 @@ cpu_probe_amdc1e(void)
#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
{
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else {
- uint64_t msr;
-
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
- if (cpu_idle_hook)
- cpu_idle_hook();
- else
- __asm __volatile("sti; hlt");
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ (cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ cpu_ident_amdc1e = 1;
}
}
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
void (*cpu_idle_fn)(int) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
+ uint64_t msr;
+
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
#ifdef SMP
if (mp_grab_cpu_hlt())
return;
#endif
- cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states. Lower 4 bits are sub-states.
- */
-#define MWAIT_C0 0xf0
-#define MWAIT_C1 0x00
-#define MWAIT_C2 0x10
-#define MWAIT_C3 0x20
-#define MWAIT_C4 0x30
-
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ goto out;
+ }
+ }
-static void
-cpu_idle_mwait(int busy)
-{
- int *mwait;
+ /* If we have time - switch timers into idle mode. */
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
-}
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
+ msr = rdmsr(MSR_AMDK8_IPM);
+ if (msr & AMDK8_CMPHALT)
+ wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+ }
-static void
-cpu_idle_mwait_hlt(int busy)
-{
- int *mwait;
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+ /* Switch timers mack into active mode. */
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+out:
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -771,8 +774,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
- { cpu_idle_amdc1e, "amdc1e" },
{ cpu_idle_hlt, "hlt" },
{ cpu_idle_acpi, "acpi" },
{ NULL, NULL }
@@ -791,8 +792,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
p += sprintf(p, "%s, ", idle_tbl[i].id_name);
}
@@ -801,6 +802,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -824,8 +828,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
if (strcmp(idle_tbl[i].id_name, buf))
continue;
@@ -835,9 +839,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
@@ -1743,8 +1744,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
}
#endif
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
/* Location of kernel stack for locore */
return ((u_int64_t)thread0.td_pcb);
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index e2f82ec..49b380b 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -118,7 +118,6 @@ u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
u_long *ipi_lazypmap_counts[MAXCPU];
static u_long *ipi_hardclock_counts[MAXCPU];
-static u_long *ipi_statclock_counts[MAXCPU];
#endif
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
@@ -1196,16 +1195,22 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
void
ipi_bitmap_handler(struct trapframe frame)
{
+ struct trapframe *oldframe;
+ struct thread *td;
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = &frame;
ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
if (ipi_bitmap & (1 << IPI_PREEMPT)) {
#ifdef COUNT_IPIS
(*ipi_preempt_counts[cpu])++;
#endif
- sched_preempt(curthread);
+ sched_preempt(td);
}
if (ipi_bitmap & (1 << IPI_AST)) {
#ifdef COUNT_IPIS
@@ -1217,14 +1222,11 @@ ipi_bitmap_handler(struct trapframe frame)
#ifdef COUNT_IPIS
(*ipi_hardclock_counts[cpu])++;
#endif
- hardclockintr(&frame);
- }
- if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
-#ifdef COUNT_IPIS
- (*ipi_statclock_counts[cpu])++;
-#endif
- statclockintr(&frame);
+ hardclockintr();
}
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
/*
@@ -1579,8 +1581,6 @@ mp_ipi_intrcnt(void *dummy)
intrcnt_add(buf, &ipi_lazypmap_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
intrcnt_add(buf, &ipi_hardclock_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:statclock", i);
- intrcnt_add(buf, &ipi_statclock_counts[i]);
}
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index 2ebf7c2..ae2f5b9 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -123,8 +123,7 @@
#define IPI_AST 0 /* Generate software trap. */
#define IPI_PREEMPT 1
#define IPI_HARDCLOCK 2
-#define IPI_STATCLOCK 3
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c
index 5dd6ab9..fabbee9 100644
--- a/sys/dev/acpica/acpi_cpu.c
+++ b/sys/dev/acpica/acpi_cpu.c
@@ -900,7 +900,13 @@ acpi_cpu_idle()
/* Find the lowest state that has small enough latency. */
cx_next_idx = 0;
- for (i = sc->cpu_cx_lowest; i >= 0; i--) {
+#ifndef __ia64__
+ if (cpu_disable_deep_sleep)
+ i = sc->cpu_non_c3;
+ else
+#endif
+ i = sc->cpu_cx_lowest;
+ for (; i >= 0; i--) {
if (sc->cpu_cx_states[i].trans_lat * 3 <= sc->cpu_prev_sleep) {
cx_next_idx = i;
break;
@@ -929,15 +935,17 @@ acpi_cpu_idle()
/*
* Execute HLT (or equivalent) and wait for an interrupt. We can't
* precisely calculate the time spent in C1 since the place we wake up
- * is an ISR. Assume we slept no more then half of quantum.
+ * is an ISR. Assume we slept no more then half of quantum, unless
+ * we are called inside critical section, delaying context switch.
*/
if (cx_next->type == ACPI_STATE_C1) {
AcpiHwRead(&start_time, &AcpiGbl_FADT.XPmTimerBlock);
acpi_cpu_c1();
AcpiHwRead(&end_time, &AcpiGbl_FADT.XPmTimerBlock);
- end_time = acpi_TimerDelta(end_time, start_time);
- sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 +
- min(PM_USEC(end_time), 500000 / hz)) / 4;
+ end_time = PM_USEC(acpi_TimerDelta(end_time, start_time));
+ if (curthread->td_critnest == 0)
+ end_time = min(end_time, 500000 / hz);
+ sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 + end_time) / 4;
return;
}
diff --git a/sys/dev/acpica/acpi_hpet.c b/sys/dev/acpica/acpi_hpet.c
index 2a8eb30..f5cf11a 100644
--- a/sys/dev/acpica/acpi_hpet.c
+++ b/sys/dev/acpica/acpi_hpet.c
@@ -683,15 +683,15 @@ hpet_detach(device_t dev)
static int
hpet_suspend(device_t dev)
{
- struct hpet_softc *sc;
+// struct hpet_softc *sc;
/*
* Disable the timer during suspend. The timer will not lose
* its state in S1 or S2, but we are required to disable
* it.
*/
- sc = device_get_softc(dev);
- hpet_disable(sc);
+// sc = device_get_softc(dev);
+// hpet_disable(sc);
return (0);
}
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index ef229ca..2bf6dd1 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1175,9 +1175,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
return (0);
}
-
-void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
-
#ifdef XEN
void
@@ -1208,60 +1205,94 @@ cpu_halt(void)
__asm__ ("hlt");
}
+#endif
+
+void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
static void
-cpu_idle_hlt(int busy)
+cpu_idle_acpi(int busy)
{
- /*
- * we must absolutely guarentee that hlt is the next instruction
- * after sti or we introduce a timing window.
- */
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
+ else if (cpu_idle_hook)
+ cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
-#endif
+#ifndef XEN
static void
-cpu_idle_acpi(int busy)
+cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+ /*
+ * We must absolutely guarentee that hlt is the next instruction
+ * after sti or we introduce a timing window.
+ */
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
+#endif
+
+/*
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
+ */
+#define MWAIT_C0 0xf0
+#define MWAIT_C1 0x00
+#define MWAIT_C2 0x10
+#define MWAIT_C3 0x20
+#define MWAIT_C4 0x30
-static int cpu_ident_amdc1e = 0;
+static void
+cpu_idle_mwait(int busy)
+{
+ int *state;
-static int
-cpu_probe_amdc1e(void)
-{
-#ifdef DEV_APIC
- int i;
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+ if (!sched_runnable()) {
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ cpu_mwait(0, MWAIT_C1);
+ }
+ *state = STATE_RUNNING;
+}
- /*
- * Forget it, if we're not using local APIC timer.
- */
- if (resource_disabled("apic", 0) ||
- (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
- return (0);
+static void
+cpu_idle_spin(int busy)
+{
+ int *state;
+ int i;
- /*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- return (1);
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
-#endif
- return (0);
}
/*
@@ -1279,32 +1310,20 @@ cpu_probe_amdc1e(void)
#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
{
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else {
- uint64_t msr;
-
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
- if (cpu_idle_hook)
- cpu_idle_hook();
- else
- __asm __volatile("sti; hlt");
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ (cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ cpu_ident_amdc1e = 1;
}
}
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
#ifdef XEN
void (*cpu_idle_fn)(int) = cpu_idle_hlt;
#else
@@ -1314,79 +1333,72 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
+ uint64_t msr;
+
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
#if defined(SMP) && !defined(XEN)
if (mp_grab_cpu_hlt())
return;
#endif
- cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states. Lower 4 bits are sub-states.
- */
-#define MWAIT_C0 0xf0
-#define MWAIT_C1 0x00
-#define MWAIT_C2 0x10
-#define MWAIT_C3 0x20
-#define MWAIT_C4 0x30
-
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ goto out;
+ }
+ }
-static void
-cpu_idle_mwait(int busy)
-{
- int *mwait;
+#ifndef XEN
+ /* If we have time - switch timers into idle mode. */
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
+#endif
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
-}
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e
+#ifndef XEN
+ && cpu_disable_deep_sleep
+#endif
+ ) {
+ msr = rdmsr(MSR_AMDK8_IPM);
+ if (msr & AMDK8_CMPHALT)
+ wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+ }
-static void
-cpu_idle_mwait_hlt(int busy)
-{
- int *mwait;
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+#ifndef XEN
+ /* Switch timers mack into active mode. */
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+#endif
+out:
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -1399,8 +1411,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
- { cpu_idle_amdc1e, "amdc1e" },
{ cpu_idle_hlt, "hlt" },
{ cpu_idle_acpi, "acpi" },
{ NULL, NULL }
@@ -1419,8 +1429,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
p += sprintf(p, "%s, ", idle_tbl[i].id_name);
}
@@ -1429,6 +1439,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -1452,8 +1465,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
if (strcmp(idle_tbl[i].id_name, buf))
continue;
@@ -1463,9 +1476,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
@@ -2695,8 +2705,7 @@ init386(first)
thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
}
#else
@@ -2970,8 +2979,7 @@ init386(first)
thread0.td_pcb->pcb_ext = 0;
thread0.td_frame = &proc0_tf;
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
}
#endif
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index fa50ecf..f660e1c 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -167,7 +167,6 @@ u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
u_long *ipi_lazypmap_counts[MAXCPU];
static u_long *ipi_hardclock_counts[MAXCPU];
-static u_long *ipi_statclock_counts[MAXCPU];
#endif
/*
@@ -1284,16 +1283,22 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
void
ipi_bitmap_handler(struct trapframe frame)
{
+ struct trapframe *oldframe;
+ struct thread *td;
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = &frame;
ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
if (ipi_bitmap & (1 << IPI_PREEMPT)) {
#ifdef COUNT_IPIS
(*ipi_preempt_counts[cpu])++;
#endif
- sched_preempt(curthread);
+ sched_preempt(td);
}
if (ipi_bitmap & (1 << IPI_AST)) {
#ifdef COUNT_IPIS
@@ -1305,14 +1310,11 @@ ipi_bitmap_handler(struct trapframe frame)
#ifdef COUNT_IPIS
(*ipi_hardclock_counts[cpu])++;
#endif
- hardclockintr(&frame);
- }
- if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
-#ifdef COUNT_IPIS
- (*ipi_statclock_counts[cpu])++;
-#endif
- statclockintr(&frame);
+ hardclockintr();
}
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
/*
@@ -1627,8 +1629,6 @@ mp_ipi_intrcnt(void *dummy)
intrcnt_add(buf, &ipi_lazypmap_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
intrcnt_add(buf, &ipi_hardclock_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:statclock", i);
- intrcnt_add(buf, &ipi_statclock_counts[i]);
}
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h
index cada017..ff1f657 100644
--- a/sys/i386/include/apicvar.h
+++ b/sys/i386/include/apicvar.h
@@ -124,8 +124,7 @@
#define IPI_AST 0 /* Generate software trap. */
#define IPI_PREEMPT 1
#define IPI_HARDCLOCK 2
-#define IPI_STATCLOCK 3
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
@@ -152,8 +151,7 @@
#define IPI_AST 0 /* Generate software trap. */
#define IPI_PREEMPT 1
#define IPI_HARDCLOCK 2
-#define IPI_STATCLOCK 3
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index c283b6b..ff5747e 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -373,11 +373,9 @@ int profprocs;
int ticks;
int psratio;
-int timer1hz;
-int timer2hz;
-static DPCPU_DEFINE(u_int, hard_cnt);
-static DPCPU_DEFINE(u_int, stat_cnt);
-static DPCPU_DEFINE(u_int, prof_cnt);
+static DPCPU_DEFINE(int, pcputicks); /* Per-CPU version of ticks. */
+static struct mtx global_hardclock_mtx;
+MTX_SYSINIT(global_hardclock_mtx, &global_hardclock_mtx, "ghc_mtx", MTX_SPIN);
/*
* Initialize clock frequencies and start both clocks running.
@@ -408,52 +406,6 @@ initclocks(dummy)
#endif
}
-void
-timer1clock(int usermode, uintfptr_t pc)
-{
- u_int *cnt;
-
- cnt = DPCPU_PTR(hard_cnt);
- *cnt += hz;
- if (*cnt >= timer1hz) {
- *cnt -= timer1hz;
- if (*cnt >= timer1hz)
- *cnt = 0;
- if (PCPU_GET(cpuid) == 0)
- hardclock(usermode, pc);
- else
- hardclock_cpu(usermode);
- }
- if (timer2hz == 0)
- timer2clock(usermode, pc);
-}
-
-void
-timer2clock(int usermode, uintfptr_t pc)
-{
- u_int *cnt;
- int t2hz = timer2hz ? timer2hz : timer1hz;
-
- cnt = DPCPU_PTR(stat_cnt);
- *cnt += stathz;
- if (*cnt >= t2hz) {
- *cnt -= t2hz;
- if (*cnt >= t2hz)
- *cnt = 0;
- statclock(usermode);
- }
- if (profprocs == 0)
- return;
- cnt = DPCPU_PTR(prof_cnt);
- *cnt += profhz;
- if (*cnt >= t2hz) {
- *cnt -= t2hz;
- if (*cnt >= t2hz)
- *cnt = 0;
- profclock(usermode, pc);
- }
-}
-
/*
* Each time the real-time timer fires, this function is called on all CPUs.
* Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
@@ -486,7 +438,7 @@ hardclock_cpu(int usermode)
PROC_SUNLOCK(p);
}
thread_lock(td);
- sched_tick();
+ sched_tick(1);
td->td_flags |= flags;
thread_unlock(td);
@@ -507,6 +459,7 @@ hardclock(int usermode, uintfptr_t pc)
atomic_add_int((volatile int *)&ticks, 1);
hardclock_cpu(usermode);
tc_ticktock();
+ cpu_tick_calibration();
/*
* If no separate statistics clock is available, run it from here.
*
@@ -525,6 +478,89 @@ hardclock(int usermode, uintfptr_t pc)
#endif /* SW_WATCHDOG */
}
+void
+hardclock_anycpu(int cnt, int usermode)
+{
+ struct pstats *pstats;
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ int *t = DPCPU_PTR(pcputicks);
+ int flags;
+ int global, newticks;
+
+ /*
+ * Update per-CPU and possibly global ticks values.
+ */
+ *t += cnt;
+ do {
+ global = ticks;
+ newticks = *t - global;
+ if (newticks <= 0) {
+ if (newticks < -1)
+ *t = global - 1;
+ newticks = 0;
+ break;
+ }
+ } while (!atomic_cmpset_int(&ticks, global, *t));
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ flags = 0;
+ if (usermode &&
+ timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
+ PROC_SLOCK(p);
+ if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
+ tick * cnt) == 0)
+ flags |= TDF_ALRMPEND | TDF_ASTPENDING;
+ PROC_SUNLOCK(p);
+ }
+ if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
+ PROC_SLOCK(p);
+ if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
+ tick * cnt) == 0)
+ flags |= TDF_PROFPEND | TDF_ASTPENDING;
+ PROC_SUNLOCK(p);
+ }
+ thread_lock(td);
+ sched_tick(cnt);
+ td->td_flags |= flags;
+ thread_unlock(td);
+
+#ifdef HWPMC_HOOKS
+ if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
+ PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
+#endif
+ callout_tick();
+ /* We are in charge to handle this tick duty. */
+ if (newticks > 0) {
+ mtx_lock_spin(&global_hardclock_mtx);
+ tc_ticktock();
+#ifdef DEVICE_POLLING
+ hardclock_device_poll(); /* This is very short and quick. */
+#endif /* DEVICE_POLLING */
+#ifdef SW_WATCHDOG
+ if (watchdog_enabled > 0) {
+ watchdog_ticks -= newticks;
+ if (watchdog_ticks <= 0)
+ watchdog_fire();
+ }
+#endif /* SW_WATCHDOG */
+ mtx_unlock_spin(&global_hardclock_mtx);
+ }
+ if (curcpu == CPU_FIRST())
+ cpu_tick_calibration();
+}
+
+void
+hardclock_sync(int cpu)
+{
+ int *t = DPCPU_ID_PTR(cpu, pcputicks);
+
+ *t = ticks;
+}
+
/*
* Compute number of ticks in the specified amount of time.
*/
diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c
index 6b005de..29304a4 100644
--- a/sys/kern/kern_clocksource.c
+++ b/sys/kern/kern_clocksource.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/lock.h>
#include <sys/kdb.h>
+#include <sys/ktr.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/kernel.h>
@@ -59,28 +60,79 @@ __FBSDID("$FreeBSD$");
cyclic_clock_func_t cyclic_clock_func[MAXCPU];
#endif
-static void cpu_restartclocks(void);
-static void timercheck(void);
-inline static int doconfigtimer(int i);
-static void configtimer(int i);
+int cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
-static struct eventtimer *timer[2] = { NULL, NULL };
-static int timertest = 0;
-static int timerticks[2] = { 0, 0 };
-static int profiling_on = 0;
-static struct bintime timerperiod[2];
+static void setuptimer(void);
+static void loadtimer(struct bintime *now, int first);
+static int doconfigtimer(void);
+static void configtimer(int start);
+static int round_freq(struct eventtimer *et, int freq);
-static char timername[2][32];
-TUNABLE_STR("kern.eventtimer.timer1", timername[0], sizeof(*timername));
-TUNABLE_STR("kern.eventtimer.timer2", timername[1], sizeof(*timername));
+static void getnextcpuevent(struct bintime *event, int idle);
+static void getnextevent(struct bintime *event);
+static int handleevents(struct bintime *now, int fake);
+#ifdef SMP
+static void cpu_new_callout(int cpu, int ticks);
+#endif
+
+static struct mtx et_hw_mtx;
+
+#define ET_HW_LOCK(state) \
+ { \
+ if (timer->et_flags & ET_FLAGS_PERCPU) \
+ mtx_lock_spin(&(state)->et_hw_mtx); \
+ else \
+ mtx_lock_spin(&et_hw_mtx); \
+ }
+
+#define ET_HW_UNLOCK(state) \
+ { \
+ if (timer->et_flags & ET_FLAGS_PERCPU) \
+ mtx_unlock_spin(&(state)->et_hw_mtx); \
+ else \
+ mtx_unlock_spin(&et_hw_mtx); \
+ }
+
+static struct eventtimer *timer = NULL;
+static struct bintime timerperiod; /* Timer period for periodic mode. */
+static struct bintime hardperiod; /* hardclock() events period. */
+static struct bintime statperiod; /* statclock() events period. */
+static struct bintime profperiod; /* profclock() events period. */
+static struct bintime nexttick; /* Next global timer tick time. */
+static u_int busy = 0; /* Reconfiguration is in progress. */
+static int profiling = 0; /* Profiling events enabled. */
+
+static char timername[32]; /* Wanted timer. */
+TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
-static u_int singlemul = 0;
+static u_int singlemul = 0; /* Multiplier for periodic mode. */
TUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
- 0, "Multiplier, used in single timer mode");
-
-typedef u_int tc[2];
-static DPCPU_DEFINE(tc, configtimer);
+ 0, "Multiplier for periodic mode");
+
+static u_int idletick = 0; /* Idle mode allowed. */
+TUNABLE_INT("kern.eventtimer.idletick", &idletick);
+SYSCTL_INT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
+ 0, "Run periodic events when idle");
+
+static int periodic = 0; /* Periodic or one-shot mode. */
+TUNABLE_INT("kern.eventtimer.periodic", &periodic);
+
+struct pcpu_state {
+ struct mtx et_hw_mtx; /* Per-CPU timer mutex. */
+ u_int action; /* Reconfiguration requests. */
+ u_int handle; /* Immediate handle resuests. */
+ struct bintime now; /* Last tick time. */
+ struct bintime nextevent; /* Next scheduled event on this CPU. */
+ struct bintime nexttick; /* Next timer tick time. */
+ struct bintime nexthard; /* Next hardlock() event. */
+ struct bintime nextstat; /* Next statclock() event. */
+ struct bintime nextprof; /* Next profclock() event. */
+ int ipi; /* This CPU needs IPI. */
+ int idle; /* This CPU is in idle mode. */
+};
+
+static DPCPU_DEFINE(struct pcpu_state, timerstate);
#define FREQ2BT(freq, bt) \
{ \
@@ -91,159 +143,325 @@ static DPCPU_DEFINE(tc, configtimer);
(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
((bt)->frac >> 1))
-/* Per-CPU timer1 handler. */
-static int
-hardclockhandler(struct trapframe *frame)
+/*
+ * Timer broadcast IPI handler.
+ */
+int
+hardclockintr(void)
{
+ struct bintime now;
+ struct pcpu_state *state;
+ int done;
+
+ if (doconfigtimer() || busy)
+ return (FILTER_HANDLED);
+ state = DPCPU_PTR(timerstate);
+ now = state->now;
+ CTR4(KTR_SPARE2, "ipi at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
+ done = handleevents(&now, 0);
+ return (done ? FILTER_HANDLED : FILTER_STRAY);
+}
+/*
+ * Handle all events for specified time on this CPU
+ */
+static int
+handleevents(struct bintime *now, int fake)
+{
+ struct bintime t;
+ struct trapframe *frame;
+ struct pcpu_state *state;
+ uintfptr_t pc;
+ int usermode;
+ int done, runs;
+
+ CTR4(KTR_SPARE2, "handle at %d: now %d.%08x%08x",
+ curcpu, now->sec, (unsigned int)(now->frac >> 32),
+ (unsigned int)(now->frac & 0xffffffff));
+ done = 0;
+ if (fake) {
+ frame = NULL;
+ usermode = 0;
+ pc = 0;
+ } else {
+ frame = curthread->td_intr_frame;
+ usermode = TRAPF_USERMODE(frame);
+ pc = TRAPF_PC(frame);
+ }
#ifdef KDTRACE_HOOKS
/*
* If the DTrace hooks are configured and a callback function
* has been registered, then call it to process the high speed
* timers.
*/
- int cpu = curcpu;
- if (cyclic_clock_func[cpu] != NULL)
- (*cyclic_clock_func[cpu])(frame);
+ if (!fake && cyclic_clock_func[curcpu] != NULL)
+ (*cyclic_clock_func[curcpu])(frame);
#endif
-
- timer1clock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
- return (FILTER_HANDLED);
-}
-
-/* Per-CPU timer2 handler. */
-static int
-statclockhandler(struct trapframe *frame)
-{
-
- timer2clock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
- return (FILTER_HANDLED);
-}
-
-/* timer1 broadcast IPI handler. */
-int
-hardclockintr(struct trapframe *frame)
-{
-
- if (doconfigtimer(0))
- return (FILTER_HANDLED);
- return (hardclockhandler(frame));
+ runs = 0;
+ state = DPCPU_PTR(timerstate);
+ while (bintime_cmp(now, &state->nexthard, >=)) {
+ bintime_add(&state->nexthard, &hardperiod);
+ runs++;
+ }
+ if (runs) {
+ hardclock_anycpu(runs, usermode);
+ done = 1;
+ }
+ while (bintime_cmp(now, &state->nextstat, >=)) {
+ statclock(usermode);
+ bintime_add(&state->nextstat, &statperiod);
+ done = 1;
+ }
+ if (profiling) {
+ while (bintime_cmp(now, &state->nextprof, >=)) {
+ if (!fake)
+ profclock(usermode, pc);
+ bintime_add(&state->nextprof, &profperiod);
+ done = 1;
+ }
+ } else
+ state->nextprof = state->nextstat;
+ getnextcpuevent(&t, 0);
+ ET_HW_LOCK(state);
+ if (!busy) {
+ state->idle = 0;
+ state->nextevent = t;
+ loadtimer(now, 0);
+ }
+ ET_HW_UNLOCK(state);
+ return (done);
}
-/* timer2 broadcast IPI handler. */
-int
-statclockintr(struct trapframe *frame)
+/*
+ * Schedule binuptime of the next event on current CPU.
+ */
+static void
+getnextcpuevent(struct bintime *event, int idle)
{
-
- if (doconfigtimer(1))
- return (FILTER_HANDLED);
- return (statclockhandler(frame));
+ struct bintime tmp;
+ struct pcpu_state *state;
+ int skip;
+
+ state = DPCPU_PTR(timerstate);
+ *event = state->nexthard;
+ if (idle) { /* If CPU is idle - ask callouts for how long. */
+ skip = callout_tickstofirst() - 1;
+ CTR2(KTR_SPARE2, "skip at %d: %d", curcpu, skip);
+ tmp = hardperiod;
+ bintime_mul(&tmp, skip);
+ bintime_add(event, &tmp);
+ } else { /* If CPU is active - handle all types of events. */
+ if (bintime_cmp(event, &state->nextstat, >))
+ *event = state->nextstat;
+ if (profiling &&
+ bintime_cmp(event, &state->nextprof, >))
+ *event = state->nextprof;
+ }
}
-/* timer1 callback. */
+/*
+ * Schedule binuptime of the next event on all CPUs.
+ */
static void
-timer1cb(struct eventtimer *et, void *arg)
+getnextevent(struct bintime *event)
{
-
+ struct pcpu_state *state;
#ifdef SMP
- /* Broadcast interrupt to other CPUs for non-per-CPU timers */
- if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0)
- ipi_all_but_self(IPI_HARDCLOCK);
+ int cpu;
#endif
- if (timertest) {
- if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) {
- timerticks[0]++;
- if (timerticks[0] >= timer1hz) {
- ET_LOCK();
- timercheck();
- ET_UNLOCK();
+ int c;
+
+ state = DPCPU_PTR(timerstate);
+ *event = state->nextevent;
+ c = curcpu;
+#ifdef SMP
+ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
+ CPU_FOREACH(cpu) {
+ if (curcpu == cpu)
+ continue;
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ if (bintime_cmp(event, &state->nextevent, >)) {
+ *event = state->nextevent;
+ c = cpu;
}
}
}
- hardclockhandler(curthread->td_intr_frame);
+#endif
+ CTR5(KTR_SPARE2, "next at %d: next %d.%08x%08x by %d",
+ curcpu, event->sec, (unsigned int)(event->frac >> 32),
+ (unsigned int)(event->frac & 0xffffffff), c);
}
-/* timer2 callback. */
+/* Hardware timer callback function. */
static void
-timer2cb(struct eventtimer *et, void *arg)
+timercb(struct eventtimer *et, void *arg)
{
+ struct bintime now;
+ struct bintime *next;
+ struct pcpu_state *state;
+#ifdef SMP
+ int cpu, bcast;
+#endif
+
+ /* Do not touch anything if somebody reconfiguring timers. */
+ if (busy)
+ return;
+ /* Update present and next tick times. */
+ state = DPCPU_PTR(timerstate);
+ if (et->et_flags & ET_FLAGS_PERCPU) {
+ next = &state->nexttick;
+ } else
+ next = &nexttick;
+ if (periodic) {
+ now = *next; /* Ex-next tick time becomes present time. */
+ bintime_add(next, &timerperiod); /* Next tick in 1 period. */
+ } else {
+ binuptime(&now); /* Get present time from hardware. */
+ next->sec = -1; /* Next tick is not scheduled yet. */
+ }
+ state->now = now;
+ CTR4(KTR_SPARE2, "intr at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
#ifdef SMP
- /* Broadcast interrupt to other CPUs for non-per-CPU timers */
- if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0)
- ipi_all_but_self(IPI_STATCLOCK);
+ /* Prepare broadcasting to other CPUs for non-per-CPU timers. */
+ bcast = 0;
+ if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
+ CPU_FOREACH(cpu) {
+ if (curcpu == cpu)
+ continue;
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ ET_HW_LOCK(state);
+ state->now = now;
+ if (bintime_cmp(&now, &state->nextevent, >=)) {
+ state->nextevent.sec++;
+ state->ipi = 1;
+ bcast = 1;
+ }
+ ET_HW_UNLOCK(state);
+ }
+ }
#endif
- if (timertest) {
- if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) {
- timerticks[1]++;
- if (timerticks[1] >= timer2hz * 2) {
- ET_LOCK();
- timercheck();
- ET_UNLOCK();
+
+ /* Handle events for this time on this CPU. */
+ handleevents(&now, 0);
+
+#ifdef SMP
+ /* Broadcast interrupt to other CPUs for non-per-CPU timers. */
+ if (bcast) {
+ CPU_FOREACH(cpu) {
+ if (curcpu == cpu)
+ continue;
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ if (state->ipi) {
+ state->ipi = 0;
+ ipi_cpu(cpu, IPI_HARDCLOCK);
}
}
}
- statclockhandler(curthread->td_intr_frame);
+#endif
}
/*
- * Check that both timers are running with at least 1/4 of configured rate.
- * If not - replace the broken one.
+ * Load new value into hardware timer.
*/
static void
-timercheck(void)
+loadtimer(struct bintime *now, int start)
{
-
- if (!timertest)
- return;
- timertest = 0;
- if (timerticks[0] * 4 < timer1hz) {
- printf("Event timer \"%s\" is dead.\n", timer[0]->et_name);
- timer1hz = 0;
- configtimer(0);
- et_ban(timer[0]);
- et_free(timer[0]);
- timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[0] == NULL) {
- timer2hz = 0;
- configtimer(1);
- et_free(timer[1]);
- timer[1] = NULL;
- timer[0] = timer[1];
+ struct pcpu_state *state;
+ struct bintime new;
+ struct bintime *next;
+ uint64_t tmp;
+ int eq;
+
+ if (periodic) {
+ if (start) {
+ /*
+ * Try to start all periodic timers aligned
+ * to period to make events synchronous.
+ */
+ tmp = ((uint64_t)now->sec << 36) + (now->frac >> 28);
+ tmp = (tmp % (timerperiod.frac >> 28)) << 28;
+ tmp = timerperiod.frac - tmp;
+ new = timerperiod;
+ bintime_addx(&new, tmp);
+ CTR5(KTR_SPARE2, "load p at %d: now %d.%08x first in %d.%08x",
+ curcpu, now->sec, (unsigned int)(now->frac >> 32),
+ new.sec, (unsigned int)(new.frac >> 32));
+ et_start(timer, &new, &timerperiod);
+ }
+ } else {
+ if (timer->et_flags & ET_FLAGS_PERCPU) {
+ state = DPCPU_PTR(timerstate);
+ next = &state->nexttick;
+ } else
+ next = &nexttick;
+ getnextevent(&new);
+ eq = bintime_cmp(&new, next, ==);
+ CTR5(KTR_SPARE2, "load at %d: next %d.%08x%08x eq %d",
+ curcpu, new.sec, (unsigned int)(new.frac >> 32),
+ (unsigned int)(new.frac & 0xffffffff),
+ eq);
+ if (!eq) {
+ *next = new;
+ bintime_sub(&new, now);
+ et_start(timer, &new, NULL);
}
- et_init(timer[0], timer1cb, NULL, NULL);
- cpu_restartclocks();
- return;
- }
- if (timerticks[1] * 4 < timer2hz) {
- printf("Event timer \"%s\" is dead.\n", timer[1]->et_name);
- timer2hz = 0;
- configtimer(1);
- et_ban(timer[1]);
- et_free(timer[1]);
- timer[1] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[1] != NULL)
- et_init(timer[1], timer2cb, NULL, NULL);
- cpu_restartclocks();
- return;
}
}
/*
- * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
+ * Prepare event timer parameters after configuration changes.
*/
-inline static int
-doconfigtimer(int i)
+static void
+setuptimer(void)
{
- tc *conf;
+ int freq;
+
+ if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
+ periodic = 0;
+ else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
+ periodic = 1;
+ freq = hz * singlemul;
+ while (freq < (profiling ? profhz : stathz))
+ freq += hz;
+ freq = round_freq(timer, freq);
+ FREQ2BT(freq, &timerperiod);
+}
- conf = DPCPU_PTR(configtimer);
- if (atomic_load_acq_int(*conf + i)) {
- if (i == 0 ? timer1hz : timer2hz)
- et_start(timer[i], NULL, &timerperiod[i]);
- else
- et_stop(timer[i]);
- atomic_store_rel_int(*conf + i, 0);
+/*
+ * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
+ */
+static int
+doconfigtimer(void)
+{
+ struct bintime now;
+ struct pcpu_state *state;
+
+ state = DPCPU_PTR(timerstate);
+ switch (atomic_load_acq_int(&state->action)) {
+ case 1:
+ binuptime(&now);
+ ET_HW_LOCK(state);
+ loadtimer(&now, 1);
+ ET_HW_UNLOCK(state);
+ state->handle = 0;
+ atomic_store_rel_int(&state->action, 0);
+ return (1);
+ case 2:
+ ET_HW_LOCK(state);
+ et_stop(timer);
+ ET_HW_UNLOCK(state);
+ state->handle = 0;
+ atomic_store_rel_int(&state->action, 0);
+ return (1);
+ }
+ if (atomic_readandclear_int(&state->handle) && !busy) {
+ binuptime(&now);
+ handleevents(&now, 0);
return (1);
}
return (0);
@@ -254,45 +472,79 @@ doconfigtimer(int i)
* For per-CPU timers use IPI to make other CPUs to reconfigure.
*/
static void
-configtimer(int i)
+configtimer(int start)
{
-#ifdef SMP
- tc *conf;
+ struct bintime now, next;
+ struct pcpu_state *state;
int cpu;
+ if (start) {
+ setuptimer();
+ binuptime(&now);
+ }
critical_enter();
-#endif
- /* Start/stop global timer or per-CPU timer of this CPU. */
- if (i == 0 ? timer1hz : timer2hz)
- et_start(timer[i], NULL, &timerperiod[i]);
- else
- et_stop(timer[i]);
+ ET_HW_LOCK(DPCPU_PTR(timerstate));
+ if (start) {
+ /* Initialize time machine parameters. */
+ next = now;
+ bintime_add(&next, &timerperiod);
+ if (periodic)
+ nexttick = next;
+ else
+ nexttick.sec = -1;
+ CPU_FOREACH(cpu) {
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ state->now = now;
+ state->nextevent = next;
+ if (periodic)
+ state->nexttick = next;
+ else
+ state->nexttick.sec = -1;
+ state->nexthard = next;
+ state->nextstat = next;
+ state->nextprof = next;
+ hardclock_sync(cpu);
+ }
+ busy = 0;
+ /* Start global timer or per-CPU timer of this CPU. */
+ loadtimer(&now, 1);
+ } else {
+ busy = 1;
+ /* Stop global timer or per-CPU timer of this CPU. */
+ et_stop(timer);
+ }
+ ET_HW_UNLOCK(DPCPU_PTR(timerstate));
#ifdef SMP
- if ((timer[i]->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
+ /* If timer is global or there is no other CPUs yet - we are done. */
+ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
critical_exit();
return;
}
/* Set reconfigure flags for other CPUs. */
CPU_FOREACH(cpu) {
- conf = DPCPU_ID_PTR(cpu, configtimer);
- atomic_store_rel_int(*conf + i, (cpu == curcpu) ? 0 : 1);
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ atomic_store_rel_int(&state->action,
+ (cpu == curcpu) ? 0 : ( start ? 1 : 2));
}
- /* Send reconfigure IPI. */
- ipi_all_but_self(i == 0 ? IPI_HARDCLOCK : IPI_STATCLOCK);
+ /* Broadcast reconfigure IPI. */
+ ipi_all_but_self(IPI_HARDCLOCK);
/* Wait for reconfiguration completed. */
restart:
cpu_spinwait();
CPU_FOREACH(cpu) {
if (cpu == curcpu)
continue;
- conf = DPCPU_ID_PTR(cpu, configtimer);
- if (atomic_load_acq_int(*conf + i))
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ if (atomic_load_acq_int(&state->action))
goto restart;
}
- critical_exit();
#endif
+ critical_exit();
}
+/*
+ * Calculate nearest frequency supported by hardware timer.
+ */
static int
round_freq(struct eventtimer *et, int freq)
{
@@ -314,23 +566,49 @@ round_freq(struct eventtimer *et, int freq)
}
/*
- * Configure and start event timers.
+ * Configure and start event timers (BSP part).
*/
void
cpu_initclocks_bsp(void)
{
- int base, div;
+ struct pcpu_state *state;
+ int base, div, cpu;
- timer[0] = et_find(timername[0], ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[0] == NULL)
- timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[0] == NULL)
+ mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
+ CPU_FOREACH(cpu) {
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
+ }
+#ifdef SMP
+ callout_new_inserted = cpu_new_callout;
+#endif
+ /* Grab requested timer or the best of present. */
+ if (timername[0])
+ timer = et_find(timername, 0, 0);
+ if (timer == NULL && periodic) {
+ timer = et_find(NULL,
+ ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+ }
+ if (timer == NULL) {
+ timer = et_find(NULL,
+ ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
+ }
+ if (timer == NULL && !periodic) {
+ timer = et_find(NULL,
+ ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+ }
+ if (timer == NULL)
panic("No usable event timer found!");
- et_init(timer[0], timer1cb, NULL, NULL);
- timer[1] = et_find(timername[1][0] ? timername[1] : NULL,
- ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[1])
- et_init(timer[1], timer2cb, NULL, NULL);
+ et_init(timer, timercb, NULL, NULL);
+
+ /* Adapt to timer capabilities. */
+ if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
+ periodic = 0;
+ else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
+ periodic = 1;
+ if (timer->et_flags & ET_FLAGS_C3STOP)
+ cpu_disable_deep_sleep++;
+
/*
* We honor the requested 'hz' value.
* We want to run stathz in the neighborhood of 128hz.
@@ -344,8 +622,8 @@ cpu_initclocks_bsp(void)
else
singlemul = 4;
}
- if (timer[1] == NULL) {
- base = round_freq(timer[0], hz * singlemul);
+ if (periodic) {
+ base = round_freq(timer, hz * singlemul);
singlemul = max((base + hz / 2) / hz, 1);
hz = (base + singlemul / 2) / singlemul;
if (base <= 128)
@@ -359,175 +637,236 @@ cpu_initclocks_bsp(void)
profhz = stathz;
while ((profhz + stathz) <= 128 * 64)
profhz += stathz;
- profhz = round_freq(timer[0], profhz);
+ profhz = round_freq(timer, profhz);
} else {
- hz = round_freq(timer[0], hz);
- stathz = round_freq(timer[1], 127);
- profhz = round_freq(timer[1], stathz * 64);
+ hz = round_freq(timer, hz);
+ stathz = round_freq(timer, 127);
+ profhz = round_freq(timer, stathz * 64);
}
tick = 1000000 / hz;
+ FREQ2BT(hz, &hardperiod);
+ FREQ2BT(stathz, &statperiod);
+ FREQ2BT(profhz, &profperiod);
ET_LOCK();
- cpu_restartclocks();
+ configtimer(1);
ET_UNLOCK();
}
-/* Start per-CPU event timers on APs. */
+/*
+ * Start per-CPU event timers on APs.
+ */
void
cpu_initclocks_ap(void)
{
+ struct bintime now;
+ struct pcpu_state *state;
+
+ if (timer->et_flags & ET_FLAGS_PERCPU) {
+ state = DPCPU_PTR(timerstate);
+ binuptime(&now);
+ ET_HW_LOCK(state);
+ loadtimer(&now, 1);
+ ET_HW_UNLOCK(state);
+ }
+}
+
+/*
+ * Switch to profiling clock rates.
+ */
+void
+cpu_startprofclock(void)
+{
ET_LOCK();
- if (timer[0]->et_flags & ET_FLAGS_PERCPU)
- et_start(timer[0], NULL, &timerperiod[0]);
- if (timer[1] && timer[1]->et_flags & ET_FLAGS_PERCPU)
- et_start(timer[1], NULL, &timerperiod[1]);
+ if (periodic) {
+ configtimer(0);
+ profiling = 1;
+ configtimer(1);
+ } else
+ profiling = 1;
ET_UNLOCK();
}
-/* Reconfigure and restart event timers after configuration changes. */
-static void
-cpu_restartclocks(void)
+/*
+ * Switch to regular clock rates.
+ */
+void
+cpu_stopprofclock(void)
{
- /* Stop all event timers. */
- timertest = 0;
- if (timer1hz) {
- timer1hz = 0;
+ ET_LOCK();
+ if (periodic) {
configtimer(0);
- }
- if (timer[1] && timer2hz) {
- timer2hz = 0;
+ profiling = 0;
configtimer(1);
- }
- /* Calculate new event timers parameters. */
- if (timer[1] == NULL) {
- timer1hz = hz * singlemul;
- while (timer1hz < (profiling_on ? profhz : stathz))
- timer1hz += hz;
- timer2hz = 0;
- } else {
- timer1hz = hz;
- timer2hz = profiling_on ? profhz : stathz;
- timer2hz = round_freq(timer[1], timer2hz);
- }
- timer1hz = round_freq(timer[0], timer1hz);
- printf("Starting kernel event timers: %s @ %dHz, %s @ %dHz\n",
- timer[0]->et_name, timer1hz,
- timer[1] ? timer[1]->et_name : "NONE", timer2hz);
- /* Restart event timers. */
- FREQ2BT(timer1hz, &timerperiod[0]);
- configtimer(0);
- if (timer[1]) {
- timerticks[0] = 0;
- timerticks[1] = 0;
- FREQ2BT(timer2hz, &timerperiod[1]);
- configtimer(1);
- timertest = 1;
- }
+ } else
+ profiling = 0;
+ ET_UNLOCK();
}
-/* Switch to profiling clock rates. */
+/*
+ * Switch to idle mode (all ticks handled).
+ */
void
-cpu_startprofclock(void)
+cpu_idleclock(void)
{
+ struct bintime now, t;
+ struct pcpu_state *state;
- ET_LOCK();
- profiling_on = 1;
- cpu_restartclocks();
- ET_UNLOCK();
+ if (idletick || busy ||
+ (periodic && (timer->et_flags & ET_FLAGS_PERCPU)))
+ return;
+ state = DPCPU_PTR(timerstate);
+ if (periodic)
+ now = state->now;
+ else
+ binuptime(&now);
+ CTR4(KTR_SPARE2, "idle at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
+ getnextcpuevent(&t, 1);
+ ET_HW_LOCK(state);
+ state->idle = 1;
+ state->nextevent = t;
+ if (!periodic)
+ loadtimer(&now, 0);
+ ET_HW_UNLOCK(state);
}
-/* Switch to regular clock rates. */
+/*
+ * Switch to active mode (skip empty ticks).
+ */
void
-cpu_stopprofclock(void)
+cpu_activeclock(void)
{
+ struct bintime now;
+ struct pcpu_state *state;
+ struct thread *td;
- ET_LOCK();
- profiling_on = 0;
- cpu_restartclocks();
- ET_UNLOCK();
+ state = DPCPU_PTR(timerstate);
+ if (state->idle == 0 || busy)
+ return;
+ if (periodic)
+ now = state->now;
+ else
+ binuptime(&now);
+ CTR4(KTR_SPARE2, "active at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
+ spinlock_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ handleevents(&now, 1);
+ td->td_intr_nesting_level--;
+ spinlock_exit();
}
-/* Report or change the active event timers hardware. */
+#ifdef SMP
+static void
+cpu_new_callout(int cpu, int ticks)
+{
+ struct bintime tmp;
+ struct pcpu_state *state;
+
+ CTR3(KTR_SPARE2, "new co at %d: on %d in %d",
+ curcpu, cpu, ticks);
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ ET_HW_LOCK(state);
+ if (state->idle == 0 || busy) {
+ ET_HW_UNLOCK(state);
+ return;
+ }
+ /*
+ * If timer is periodic - just update next event time for target CPU.
+ */
+ if (periodic) {
+ state->nextevent = state->nexthard;
+ tmp = hardperiod;
+ bintime_mul(&tmp, ticks - 1);
+ bintime_add(&state->nextevent, &tmp);
+ ET_HW_UNLOCK(state);
+ return;
+ }
+ /*
+ * Otherwise we have to wake that CPU up, as we can't get present
+ * bintime to reprogram global timer from here. If timer is per-CPU,
+ * we by definition can't do it from here.
+ */
+ ET_HW_UNLOCK(state);
+ if (timer->et_flags & ET_FLAGS_PERCPU) {
+ state->handle = 1;
+ ipi_cpu(cpu, IPI_HARDCLOCK);
+ } else {
+ if (!cpu_idle_wakeup(cpu))
+ ipi_cpu(cpu, IPI_AST);
+ }
+}
+#endif
+
+/*
+ * Report or change the active event timers hardware.
+ */
static int
-sysctl_kern_eventtimer_timer1(SYSCTL_HANDLER_ARGS)
+sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
{
char buf[32];
struct eventtimer *et;
int error;
ET_LOCK();
- et = timer[0];
+ et = timer;
snprintf(buf, sizeof(buf), "%s", et->et_name);
ET_UNLOCK();
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
ET_LOCK();
- et = timer[0];
+ et = timer;
if (error != 0 || req->newptr == NULL ||
- strcmp(buf, et->et_name) == 0) {
+ strcasecmp(buf, et->et_name) == 0) {
ET_UNLOCK();
return (error);
}
- et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+ et = et_find(buf, 0, 0);
if (et == NULL) {
ET_UNLOCK();
return (ENOENT);
}
- timer1hz = 0;
configtimer(0);
- et_free(timer[0]);
- timer[0] = et;
- et_init(timer[0], timer1cb, NULL, NULL);
- cpu_restartclocks();
+ et_free(timer);
+ if (et->et_flags & ET_FLAGS_C3STOP)
+ cpu_disable_deep_sleep++;
+ if (timer->et_flags & ET_FLAGS_C3STOP)
+ cpu_disable_deep_sleep--;
+ timer = et;
+ et_init(timer, timercb, NULL, NULL);
+ configtimer(1);
ET_UNLOCK();
return (error);
}
-SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer1,
+SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- 0, 0, sysctl_kern_eventtimer_timer1, "A", "Primary event timer");
+ 0, 0, sysctl_kern_eventtimer_timer, "A", "Kernel event timer");
+/*
+ * Report or change the active event timer periodicity.
+ */
static int
-sysctl_kern_eventtimer_timer2(SYSCTL_HANDLER_ARGS)
+sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
{
- char buf[32];
- struct eventtimer *et;
- int error;
+ int error, val;
- ET_LOCK();
- et = timer[1];
- if (et == NULL)
- snprintf(buf, sizeof(buf), "NONE");
- else
- snprintf(buf, sizeof(buf), "%s", et->et_name);
- ET_UNLOCK();
- error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
- ET_LOCK();
- et = timer[1];
- if (error != 0 || req->newptr == NULL ||
- strcmp(buf, et ? et->et_name : "NONE") == 0) {
- ET_UNLOCK();
+ val = periodic;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
return (error);
- }
- et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (et == NULL && strcasecmp(buf, "NONE") != 0) {
- ET_UNLOCK();
- return (ENOENT);
- }
- if (timer[1] != NULL) {
- timer2hz = 0;
- configtimer(1);
- et_free(timer[1]);
- }
- timer[1] = et;
- if (timer[1] != NULL)
- et_init(timer[1], timer2cb, NULL, NULL);
- cpu_restartclocks();
+ ET_LOCK();
+ configtimer(0);
+ periodic = val;
+ configtimer(1);
ET_UNLOCK();
return (error);
}
-SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer2,
- CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- 0, 0, sysctl_kern_eventtimer_timer2, "A", "Secondary event timer");
+SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_kern_eventtimer_periodic, "I", "Kernel event timer periodic");
#endif
-
diff --git a/sys/kern/kern_et.c b/sys/kern/kern_et.c
index 17b9c67..8c37556 100644
--- a/sys/kern/kern_et.c
+++ b/sys/kern/kern_et.c
@@ -38,7 +38,7 @@ SLIST_HEAD(et_eventtimers_list, eventtimer);
static struct et_eventtimers_list eventtimers = SLIST_HEAD_INITIALIZER(et_eventtimers);
struct mtx et_eventtimers_mtx;
-MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_SPIN);
+MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_DEF);
SYSCTL_NODE(_kern, OID_AUTO, eventtimer, CTLFLAG_RW, 0, "Event timers");
SYSCTL_NODE(_kern_eventtimer, OID_AUTO, et, CTLFLAG_RW, 0, "");
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index d973348..811b24f 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -770,16 +770,11 @@ void
tc_ticktock(void)
{
static int count;
- static time_t last_calib;
if (++count < tc_tick)
return;
count = 0;
tc_windup();
- if (time_uptime != last_calib && !(time_uptime & 0xf)) {
- cpu_tick_calibrate(0);
- last_calib = time_uptime;
- }
}
static void
@@ -830,9 +825,20 @@ tc_cpu_ticks(void)
return (u + base);
}
+void
+cpu_tick_calibration(void)
+{
+ static time_t last_calib;
+
+ if (time_uptime != last_calib && !(time_uptime & 0xf)) {
+ cpu_tick_calibrate(0);
+ last_calib = time_uptime;
+ }
+}
+
/*
* This function gets called every 16 seconds on only one designated
- * CPU in the system from hardclock() via tc_ticktock().
+ * CPU in the system from hardclock() via cpu_tick_calibration()().
*
* Whenever the real time clock is stepped we get called with reset=1
* to make sure we handle suspend/resume and similar events correctly.
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index 32d5691..5697792 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -111,6 +111,7 @@ struct callout_cpu {
int cc_softticks;
int cc_cancel;
int cc_waiting;
+ int cc_firsttick;
};
#ifdef SMP
@@ -126,6 +127,7 @@ struct callout_cpu cc_cpu;
#define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock)
static int timeout_cpu;
+void (*callout_new_inserted)(int cpu, int ticks) = NULL;
MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
@@ -260,7 +262,7 @@ callout_tick(void)
need_softclock = 0;
cc = CC_SELF();
mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- cc->cc_ticks++;
+ cc->cc_firsttick = cc->cc_ticks = ticks;
for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
bucket = cc->cc_softticks & callwheelmask;
if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
@@ -277,6 +279,34 @@ callout_tick(void)
swi_sched(cc->cc_cookie, 0);
}
+int
+callout_tickstofirst(void)
+{
+ struct callout_cpu *cc;
+ struct callout *c;
+ struct callout_tailq *sc;
+ int curticks;
+ int skip = 1;
+
+ cc = CC_SELF();
+ mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
+ curticks = cc->cc_ticks;
+ while( skip < ncallout && skip < hz/8 ) {
+ sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
+ /* search scanning ticks */
+ TAILQ_FOREACH( c, sc, c_links.tqe ){
+ if (c && (c->c_time <= curticks + ncallout)
+ && (c->c_time > 0))
+ goto out;
+ }
+ skip++;
+ }
+out:
+ cc->cc_firsttick = curticks + skip;
+ mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+ return (skip);
+}
+
static struct callout_cpu *
callout_lock(struct callout *c)
{
@@ -639,9 +669,14 @@ retry:
c->c_arg = arg;
c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
c->c_func = ftn;
- c->c_time = cc->cc_ticks + to_ticks;
+ c->c_time = ticks + to_ticks;
TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
c, c_links.tqe);
+ if ((c->c_time - cc->cc_firsttick) < 0) {
+ cc->cc_firsttick = c->c_time;
+ (*callout_new_inserted)(cpu,
+ to_ticks + (ticks - cc->cc_ticks));
+ }
CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
CC_UNLOCK(cc);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 780dc6d..9face64 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -1547,7 +1547,7 @@ sched_pctcpu(struct thread *td)
}
void
-sched_tick(void)
+sched_tick(int cnt)
{
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index bb2d34a..e1cc172 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -196,7 +196,7 @@ static int preempt_thresh = 0;
#endif
static int static_boost = PRI_MIN_TIMESHARE;
static int sched_idlespins = 10000;
-static int sched_idlespinthresh = 64;
+static int sched_idlespinthresh = 16;
/*
* tdq - per processor runqs and statistics. All fields are protected by the
@@ -2163,7 +2163,7 @@ sched_clock(struct thread *td)
* is easier than trying to scale based on stathz.
*/
void
-sched_tick(void)
+sched_tick(int cnt)
{
struct td_sched *ts;
@@ -2175,7 +2175,7 @@ sched_tick(void)
if (ts->ts_incrtick == ticks)
return;
/* Adjust ticks for pctcpu */
- ts->ts_ticks += 1 << SCHED_TICK_SHIFT;
+ ts->ts_ticks += cnt << SCHED_TICK_SHIFT;
ts->ts_ltick = ticks;
ts->ts_incrtick = ticks;
/*
@@ -2549,7 +2549,7 @@ sched_idletd(void *dummy)
if (tdq->tdq_load == 0) {
tdq->tdq_cpu_idle = 1;
if (tdq->tdq_load == 0) {
- cpu_idle(switchcnt > sched_idlespinthresh);
+ cpu_idle(switchcnt > sched_idlespinthresh * 4);
tdq->tdq_switchcnt++;
}
tdq->tdq_cpu_idle = 0;
diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h
index 28efd4c..58aaf03 100644
--- a/sys/mips/include/smp.h
+++ b/sys/mips/include/smp.h
@@ -28,7 +28,6 @@
#define IPI_STOP_HARD 0x0008
#define IPI_PREEMPT 0x0010
#define IPI_HARDCLOCK 0x0020
-#define IPI_STATCLOCK 0x0040
#ifndef LOCORE
diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c
index c7ff3d8..ef2f24c 100644
--- a/sys/mips/mips/mp_machdep.c
+++ b/sys/mips/mips/mp_machdep.c
@@ -164,11 +164,7 @@ mips_ipi_handler(void *arg)
break;
case IPI_HARDCLOCK:
CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
- hardclockintr(arg);;
- break;
- case IPI_STATCLOCK:
- CTR1(KTR_SMP, "%s: IPI_STATCLOCK", __func__);
- statclockintr(arg);;
+ hardclockintr();;
break;
default:
panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu);
diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c
index 671ce5b..22dc8f0 100644
--- a/sys/pc98/pc98/machdep.c
+++ b/sys/pc98/pc98/machdep.c
@@ -1120,40 +1120,36 @@ cpu_halt(void)
__asm__ ("hlt");
}
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
static void
cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
/*
- * we must absolutely guarentee that hlt is the next instruction
+ * We must absolutely guarentee that hlt is the next instruction
* after sti or we introduce a timing window.
*/
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
else
__asm __volatile("sti; hlt");
-}
-
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
-void (*cpu_idle_fn)(int) = cpu_idle_hlt;
-
-void
-cpu_idle(int busy)
-{
-#if defined(SMP)
- if (mp_grab_cpu_hlt())
- return;
-#endif
- cpu_idle_fn(busy);
+ *state = STATE_RUNNING;
}
/*
- * mwait cpu power states. Lower 4 bits are sub-states.
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
*/
#define MWAIT_C0 0xf0
#define MWAIT_C1 0x00
@@ -1161,63 +1157,91 @@ cpu_idle(int busy)
#define MWAIT_C3 0x20
#define MWAIT_C4 0x30
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
-
static void
cpu_idle_mwait(int busy)
{
- int *mwait;
-
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+ if (!sched_runnable()) {
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ cpu_mwait(0, MWAIT_C1);
+ }
+ *state = STATE_RUNNING;
}
static void
-cpu_idle_mwait_hlt(int busy)
+cpu_idle_spin(int busy)
{
- int *mwait;
+ int *state;
+ int i;
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
+}
+
+void (*cpu_idle_fn)(int) = cpu_idle_hlt;
+
+void
+cpu_idle(int busy)
+{
+
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
+#ifdef SMP
+ if (mp_grab_cpu_hlt())
return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+#endif
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ goto out;
+ }
+ }
+
+ /* If we have time - switch timers into idle mode. */
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
+
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
+
+ /* Switch timers mack into active mode. */
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
+out:
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -1230,7 +1254,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
{ cpu_idle_hlt, "hlt" },
{ NULL, NULL }
};
@@ -1255,6 +1278,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -1286,9 +1312,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c
index 3290fa6..ba06531 100644
--- a/sys/powerpc/aim/machdep.c
+++ b/sys/powerpc/aim/machdep.c
@@ -638,7 +638,13 @@ cpu_idle(int busy)
panic("ints disabled in idleproc!");
}
#endif
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
if (powerpc_pow_enabled) {
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
switch (vers) {
case IBM970:
case IBM970FX:
@@ -658,7 +664,13 @@ cpu_idle(int busy)
isync();
break;
}
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
}
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c
index c725dd8..c4b80cc 100644
--- a/sys/powerpc/booke/machdep.c
+++ b/sys/powerpc/booke/machdep.c
@@ -488,9 +488,21 @@ cpu_idle (int busy)
}
#endif
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
/* Freescale E500 core RM section 6.4.1. */
msr = msr | PSL_WE;
__asm __volatile("msync; mtmsr %0; isync" :: "r" (msr));
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h
index c78af74..cf95278 100644
--- a/sys/powerpc/include/smp.h
+++ b/sys/powerpc/include/smp.h
@@ -37,7 +37,6 @@
#define IPI_STOP 3
#define IPI_STOP_HARD 3
#define IPI_HARDCLOCK 4
-#define IPI_STATCLOCK 5
#ifndef LOCORE
diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c
index a833638..6915c4b 100644
--- a/sys/powerpc/powerpc/mp_machdep.c
+++ b/sys/powerpc/powerpc/mp_machdep.c
@@ -315,7 +315,7 @@ powerpc_ipi_handler(void *arg)
break;
case IPI_HARDCLOCK:
CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
- hardclockintr(curthread->td_intr_frame);
+ hardclockintr();
break;
}
}
diff --git a/sys/sparc64/include/intr_machdep.h b/sys/sparc64/include/intr_machdep.h
index 254ac78..158b5b6 100644
--- a/sys/sparc64/include/intr_machdep.h
+++ b/sys/sparc64/include/intr_machdep.h
@@ -47,7 +47,6 @@
#define PIL_STOP 5 /* stop cpu ipi */
#define PIL_PREEMPT 6 /* preempt idle thread cpu ipi */
#define PIL_HARDCLOCK 7 /* hardclock broadcast */
-#define PIL_STATCLOCK 8 /* statclock broadcast */
#define PIL_FILTER 12 /* filter interrupts */
#define PIL_FAST 13 /* fast interrupts */
#define PIL_TICK 14 /* tick interrupts */
diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h
index 3812431..3ca8e03 100644
--- a/sys/sparc64/include/smp.h
+++ b/sys/sparc64/include/smp.h
@@ -59,7 +59,6 @@
#define IPI_RENDEZVOUS PIL_RENDEZVOUS
#define IPI_PREEMPT PIL_PREEMPT
#define IPI_HARDCLOCK PIL_HARDCLOCK
-#define IPI_STATCLOCK PIL_STATCLOCK
#define IPI_STOP PIL_STOP
#define IPI_STOP_HARD PIL_STOP
diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c
index 8e610f6..8571286 100644
--- a/sys/sparc64/sparc64/intr_machdep.c
+++ b/sys/sparc64/sparc64/intr_machdep.c
@@ -97,8 +97,7 @@ static const char *const pil_names[] = {
"stop", /* PIL_STOP */
"preempt", /* PIL_PREEMPT */
"hardclock", /* PIL_HARDCLOCK */
- "statclock", /* PIL_STATCLOCK */
- "stray", "stray", "stray",
+ "stray", "stray", "stray", "stray",
"filter", /* PIL_FILTER */
"fast", /* PIL_FAST */
"tick", /* PIL_TICK */
diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c
index 57a2d6f..e5a9fb3 100644
--- a/sys/sparc64/sparc64/mp_machdep.c
+++ b/sys/sparc64/sparc64/mp_machdep.c
@@ -98,7 +98,6 @@ __FBSDID("$FreeBSD$");
static ih_func_t cpu_ipi_ast;
static ih_func_t cpu_ipi_hardclock;
static ih_func_t cpu_ipi_preempt;
-static ih_func_t cpu_ipi_statclock;
static ih_func_t cpu_ipi_stop;
/*
@@ -292,7 +291,6 @@ cpu_mp_start(void)
intr_setup(PIL_STOP, cpu_ipi_stop, -1, NULL, NULL);
intr_setup(PIL_PREEMPT, cpu_ipi_preempt, -1, NULL, NULL);
intr_setup(PIL_HARDCLOCK, cpu_ipi_hardclock, -1, NULL, NULL);
- intr_setup(PIL_STATCLOCK, cpu_ipi_statclock, -1, NULL, NULL);
cpuid_to_mid[curcpu] = PCPU_GET(mid);
@@ -524,15 +522,18 @@ cpu_ipi_preempt(struct trapframe *tf)
static void
cpu_ipi_hardclock(struct trapframe *tf)
{
+ struct trapframe *oldframe;
+ struct thread *td;
- hardclockintr(tf);
-}
-
-static void
-cpu_ipi_statclock(struct trapframe *tf)
-{
-
- statclockintr(tf);
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = tf;
+ hardclockintr();
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
static void
diff --git a/sys/sun4v/include/intr_machdep.h b/sys/sun4v/include/intr_machdep.h
index 370a5c0..f686e66 100644
--- a/sys/sun4v/include/intr_machdep.h
+++ b/sys/sun4v/include/intr_machdep.h
@@ -47,7 +47,6 @@
#define PIL_STOP 5 /* stop cpu ipi */
#define PIL_PREEMPT 6 /* preempt idle thread cpu ipi */
#define PIL_HARDCLOCK 7 /* hardclock broadcast */
-#define PIL_STATCLOCK 8 /* statclock broadcast */
#define PIL_FAST 13 /* fast interrupts */
#define PIL_TICK 14
diff --git a/sys/sun4v/include/smp.h b/sys/sun4v/include/smp.h
index 56c50eb..3202089 100644
--- a/sys/sun4v/include/smp.h
+++ b/sys/sun4v/include/smp.h
@@ -47,7 +47,6 @@
#define IPI_STOP_HARD PIL_STOP
#define IPI_PREEMPT PIL_PREEMPT
#define IPI_HARDCLOCK PIL_HARDCLOCK
-#define IPI_STATCLOCK PIL_STATCLOCK
#define IPI_RETRIES 5000
@@ -83,7 +82,6 @@ void cpu_ipi_ast(struct trapframe *tf);
void cpu_ipi_stop(struct trapframe *tf);
void cpu_ipi_preempt(struct trapframe *tf);
void cpu_ipi_hardclock(struct trapframe *tf);
-void cpu_ipi_statclock(struct trapframe *tf);
void ipi_all_but_self(u_int ipi);
void ipi_cpu(int cpu, u_int ipi);
diff --git a/sys/sun4v/sun4v/intr_machdep.c b/sys/sun4v/sun4v/intr_machdep.c
index 123493e..3587402 100644
--- a/sys/sun4v/sun4v/intr_machdep.c
+++ b/sys/sun4v/sun4v/intr_machdep.c
@@ -110,8 +110,7 @@ static char *pil_names[] = {
"stop", /* PIL_STOP */
"preempt", /* PIL_PREEMPT */
"hardclock", /* PIL_HARDCLOCK */
- "statclock", /* PIL_STATCLOCK */
- "stray", "stray", "stray", "stray",
+ "stray", "stray", "stray", "stray", "stray",
"fast", /* PIL_FAST */
"tick", /* PIL_TICK */
};
@@ -265,7 +264,6 @@ intr_init(void)
intr_handlers[PIL_STOP]= cpu_ipi_stop;
intr_handlers[PIL_PREEMPT]= cpu_ipi_preempt;
intr_handlers[PIL_HARDCLOCK]= cpu_ipi_hardclock;
- intr_handlers[PIL_STATCLOCK]= cpu_ipi_statclock;
#endif
mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
cpu_intrq_alloc();
diff --git a/sys/sun4v/sun4v/mp_machdep.c b/sys/sun4v/sun4v/mp_machdep.c
index 2e9a378..a9535e3 100644
--- a/sys/sun4v/sun4v/mp_machdep.c
+++ b/sys/sun4v/sun4v/mp_machdep.c
@@ -472,15 +472,18 @@ cpu_ipi_preempt(struct trapframe *tf)
void
cpu_ipi_hardclock(struct trapframe *tf)
{
+ struct trapframe *oldframe;
+ struct thread *td;
- hardclockintr(tf);
-}
-
-void
-cpu_ipi_statclock(struct trapframe *tf)
-{
-
- statclockintr(tf);
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = tf;
+ hardclockintr();
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
void
diff --git a/sys/sys/callout.h b/sys/sys/callout.h
index 2d43d14..8fcd06e 100644
--- a/sys/sys/callout.h
+++ b/sys/sys/callout.h
@@ -96,7 +96,8 @@ int callout_schedule_on(struct callout *, int, int);
#define callout_stop(c) _callout_stop_safe(c, 0)
int _callout_stop_safe(struct callout *, int);
void callout_tick(void);
-
+int callout_tickstofirst(void);
+extern void (*callout_new_inserted)(int cpu, int ticks);
#endif
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index d0ebffd..92dd4c4 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -111,7 +111,7 @@ void sched_preempt(struct thread *td);
void sched_add(struct thread *td, int flags);
void sched_clock(struct thread *td);
void sched_rem(struct thread *td);
-void sched_tick(void);
+void sched_tick(int cnt);
void sched_relinquish(struct thread *td);
struct thread *sched_choose(void);
void sched_idletd(void *);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index f913887..8e98ef4 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -237,20 +237,22 @@ void realitexpire(void *);
int sysbeep(int hertz, int period);
void hardclock(int usermode, uintfptr_t pc);
+void hardclock_anycpu(int cnt, int usermode);
void hardclock_cpu(int usermode);
+void hardclock_sync(int cpu);
void softclock(void *);
void statclock(int usermode);
void profclock(int usermode, uintfptr_t pc);
-void timer1clock(int usermode, uintfptr_t pc);
-void timer2clock(int usermode, uintfptr_t pc);
-int hardclockintr(struct trapframe *frame);
-int statclockintr(struct trapframe *frame);
+int hardclockintr(void);
void startprofclock(struct proc *);
void stopprofclock(struct proc *);
void cpu_startprofclock(void);
void cpu_stopprofclock(void);
+void cpu_idleclock(void);
+void cpu_activeclock(void);
+extern int cpu_disable_deep_sleep;
int cr_cansee(struct ucred *u1, struct ucred *u2);
int cr_canseesocket(struct ucred *cred, struct socket *so);
diff --git a/sys/sys/timeet.h b/sys/sys/timeet.h
index bc713d6..87392a2 100644
--- a/sys/sys/timeet.h
+++ b/sys/sys/timeet.h
@@ -83,8 +83,8 @@ struct eventtimer {
};
extern struct mtx et_eventtimers_mtx;
-#define ET_LOCK() mtx_lock_spin(&et_eventtimers_mtx)
-#define ET_UNLOCK() mtx_unlock_spin(&et_eventtimers_mtx)
+#define ET_LOCK() mtx_lock(&et_eventtimers_mtx)
+#define ET_UNLOCK() mtx_unlock(&et_eventtimers_mtx)
/* Driver API */
int et_register(struct eventtimer *et);
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
index d5a818b..3249788 100644
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@@ -70,6 +70,7 @@ u_int64_t tc_getfrequency(void);
void tc_init(struct timecounter *tc);
void tc_setclock(struct timespec *ts);
void tc_ticktock(void);
+void cpu_tick_calibration(void);
#ifdef SYSCTL_DECL
SYSCTL_DECL(_kern_timecounter);
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index f479bbe..6d7a53b 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -261,7 +261,7 @@ lapic_init(vm_paddr_t addr)
lapic_et.et_quality = 600;
if (!arat) {
lapic_et.et_flags |= ET_FLAGS_C3STOP;
- lapic_et.et_quality -= 100;
+ lapic_et.et_quality -= 200;
}
lapic_et.et_frequency = 0;
/* We don't know frequency yet, so trying to guess. */
OpenPOWER on IntegriCloud