summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/machdep.c228
-rw-r--r--sys/amd64/amd64/mp_machdep.c24
-rw-r--r--sys/amd64/include/apicvar.h3
-rw-r--r--sys/dev/acpica/acpi_cpu.c18
-rw-r--r--sys/dev/acpica/acpi_hpet.c6
-rw-r--r--sys/i386/i386/machdep.c252
-rw-r--r--sys/i386/i386/mp_machdep.c24
-rw-r--r--sys/i386/include/apicvar.h6
-rw-r--r--sys/kern/kern_clock.c140
-rw-r--r--sys/kern/kern_clocksource.c863
-rw-r--r--sys/kern/kern_et.c2
-rw-r--r--sys/kern/kern_tc.c18
-rw-r--r--sys/kern/kern_timeout.c39
-rw-r--r--sys/kern/sched_4bsd.c2
-rw-r--r--sys/kern/sched_ule.c8
-rw-r--r--sys/mips/include/smp.h1
-rw-r--r--sys/mips/mips/mp_machdep.c6
-rw-r--r--sys/pc98/pc98/machdep.c141
-rw-r--r--sys/powerpc/aim/machdep.c12
-rw-r--r--sys/powerpc/booke/machdep.c12
-rw-r--r--sys/powerpc/include/smp.h1
-rw-r--r--sys/powerpc/powerpc/mp_machdep.c2
-rw-r--r--sys/sparc64/include/intr_machdep.h1
-rw-r--r--sys/sparc64/include/smp.h1
-rw-r--r--sys/sparc64/sparc64/intr_machdep.c3
-rw-r--r--sys/sparc64/sparc64/mp_machdep.c21
-rw-r--r--sys/sun4v/include/intr_machdep.h1
-rw-r--r--sys/sun4v/include/smp.h2
-rw-r--r--sys/sun4v/sun4v/intr_machdep.c4
-rw-r--r--sys/sun4v/sun4v/mp_machdep.c19
-rw-r--r--sys/sys/callout.h3
-rw-r--r--sys/sys/sched.h2
-rw-r--r--sys/sys/systm.h10
-rw-r--r--sys/sys/timeet.h4
-rw-r--r--sys/sys/timetc.h1
-rw-r--r--sys/x86/x86/local_apic.c2
36 files changed, 1176 insertions, 706 deletions
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index fe68600..00182db 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -585,59 +585,89 @@ cpu_halt(void)
}
void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
static void
-cpu_idle_hlt(int busy)
+cpu_idle_acpi(int busy)
{
- /*
- * we must absolutely guarentee that hlt is the next instruction
- * after sti or we introduce a timing window.
- */
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
+ else if (cpu_idle_hook)
+ cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
static void
-cpu_idle_acpi(int busy)
+cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+ /*
+ * We must absolutely guarentee that hlt is the next instruction
+ * after sti or we introduce a timing window.
+ */
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
-static int cpu_ident_amdc1e = 0;
+/*
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
+ */
+#define MWAIT_C0 0xf0
+#define MWAIT_C1 0x00
+#define MWAIT_C2 0x10
+#define MWAIT_C3 0x20
+#define MWAIT_C4 0x30
-static int
-cpu_probe_amdc1e(void)
+static void
+cpu_idle_mwait(int busy)
{
- int i;
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+ if (!sched_runnable()) {
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ cpu_mwait(0, MWAIT_C1);
+ }
+ *state = STATE_RUNNING;
+}
- /*
- * Forget it, if we're not using local APIC timer.
- */
- if (resource_disabled("apic", 0) ||
- (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
- return (0);
+static void
+cpu_idle_spin(int busy)
+{
+ int *state;
+ int i;
- /*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- return (1);
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
-
- return (0);
}
/*
@@ -655,110 +685,83 @@ cpu_probe_amdc1e(void)
#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
{
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else {
- uint64_t msr;
-
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
- if (cpu_idle_hook)
- cpu_idle_hook();
- else
- __asm __volatile("sti; hlt");
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ (cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ cpu_ident_amdc1e = 1;
}
}
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
void (*cpu_idle_fn)(int) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
+ uint64_t msr;
+
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
#ifdef SMP
if (mp_grab_cpu_hlt())
return;
#endif
- cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states. Lower 4 bits are sub-states.
- */
-#define MWAIT_C0 0xf0
-#define MWAIT_C1 0x00
-#define MWAIT_C2 0x10
-#define MWAIT_C3 0x20
-#define MWAIT_C4 0x30
-
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ goto out;
+ }
+ }
-static void
-cpu_idle_mwait(int busy)
-{
- int *mwait;
+ /* If we have time - switch timers into idle mode. */
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
-}
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
+ msr = rdmsr(MSR_AMDK8_IPM);
+ if (msr & AMDK8_CMPHALT)
+ wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+ }
-static void
-cpu_idle_mwait_hlt(int busy)
-{
- int *mwait;
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+ /* Switch timers mack into active mode. */
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+out:
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -771,8 +774,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
- { cpu_idle_amdc1e, "amdc1e" },
{ cpu_idle_hlt, "hlt" },
{ cpu_idle_acpi, "acpi" },
{ NULL, NULL }
@@ -791,8 +792,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
p += sprintf(p, "%s, ", idle_tbl[i].id_name);
}
@@ -801,6 +802,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -824,8 +828,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
if (strcmp(idle_tbl[i].id_name, buf))
continue;
@@ -835,9 +839,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
@@ -1743,8 +1744,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
}
#endif
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
/* Location of kernel stack for locore */
return ((u_int64_t)thread0.td_pcb);
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index e2f82ec..49b380b 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -118,7 +118,6 @@ u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
u_long *ipi_lazypmap_counts[MAXCPU];
static u_long *ipi_hardclock_counts[MAXCPU];
-static u_long *ipi_statclock_counts[MAXCPU];
#endif
extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
@@ -1196,16 +1195,22 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
void
ipi_bitmap_handler(struct trapframe frame)
{
+ struct trapframe *oldframe;
+ struct thread *td;
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = &frame;
ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
if (ipi_bitmap & (1 << IPI_PREEMPT)) {
#ifdef COUNT_IPIS
(*ipi_preempt_counts[cpu])++;
#endif
- sched_preempt(curthread);
+ sched_preempt(td);
}
if (ipi_bitmap & (1 << IPI_AST)) {
#ifdef COUNT_IPIS
@@ -1217,14 +1222,11 @@ ipi_bitmap_handler(struct trapframe frame)
#ifdef COUNT_IPIS
(*ipi_hardclock_counts[cpu])++;
#endif
- hardclockintr(&frame);
- }
- if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
-#ifdef COUNT_IPIS
- (*ipi_statclock_counts[cpu])++;
-#endif
- statclockintr(&frame);
+ hardclockintr();
}
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
/*
@@ -1579,8 +1581,6 @@ mp_ipi_intrcnt(void *dummy)
intrcnt_add(buf, &ipi_lazypmap_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
intrcnt_add(buf, &ipi_hardclock_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:statclock", i);
- intrcnt_add(buf, &ipi_statclock_counts[i]);
}
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index 2ebf7c2..ae2f5b9 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -123,8 +123,7 @@
#define IPI_AST 0 /* Generate software trap. */
#define IPI_PREEMPT 1
#define IPI_HARDCLOCK 2
-#define IPI_STATCLOCK 3
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c
index 5dd6ab9..fabbee9 100644
--- a/sys/dev/acpica/acpi_cpu.c
+++ b/sys/dev/acpica/acpi_cpu.c
@@ -900,7 +900,13 @@ acpi_cpu_idle()
/* Find the lowest state that has small enough latency. */
cx_next_idx = 0;
- for (i = sc->cpu_cx_lowest; i >= 0; i--) {
+#ifndef __ia64__
+ if (cpu_disable_deep_sleep)
+ i = sc->cpu_non_c3;
+ else
+#endif
+ i = sc->cpu_cx_lowest;
+ for (; i >= 0; i--) {
if (sc->cpu_cx_states[i].trans_lat * 3 <= sc->cpu_prev_sleep) {
cx_next_idx = i;
break;
@@ -929,15 +935,17 @@ acpi_cpu_idle()
/*
* Execute HLT (or equivalent) and wait for an interrupt. We can't
* precisely calculate the time spent in C1 since the place we wake up
- * is an ISR. Assume we slept no more then half of quantum.
+ * is an ISR. Assume we slept no more then half of quantum, unless
+ * we are called inside critical section, delaying context switch.
*/
if (cx_next->type == ACPI_STATE_C1) {
AcpiHwRead(&start_time, &AcpiGbl_FADT.XPmTimerBlock);
acpi_cpu_c1();
AcpiHwRead(&end_time, &AcpiGbl_FADT.XPmTimerBlock);
- end_time = acpi_TimerDelta(end_time, start_time);
- sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 +
- min(PM_USEC(end_time), 500000 / hz)) / 4;
+ end_time = PM_USEC(acpi_TimerDelta(end_time, start_time));
+ if (curthread->td_critnest == 0)
+ end_time = min(end_time, 500000 / hz);
+ sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 + end_time) / 4;
return;
}
diff --git a/sys/dev/acpica/acpi_hpet.c b/sys/dev/acpica/acpi_hpet.c
index 2a8eb30..f5cf11a 100644
--- a/sys/dev/acpica/acpi_hpet.c
+++ b/sys/dev/acpica/acpi_hpet.c
@@ -683,15 +683,15 @@ hpet_detach(device_t dev)
static int
hpet_suspend(device_t dev)
{
- struct hpet_softc *sc;
+// struct hpet_softc *sc;
/*
* Disable the timer during suspend. The timer will not lose
* its state in S1 or S2, but we are required to disable
* it.
*/
- sc = device_get_softc(dev);
- hpet_disable(sc);
+// sc = device_get_softc(dev);
+// hpet_disable(sc);
return (0);
}
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index ef229ca..2bf6dd1 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1175,9 +1175,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
return (0);
}
-
-void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
-
#ifdef XEN
void
@@ -1208,60 +1205,94 @@ cpu_halt(void)
__asm__ ("hlt");
}
+#endif
+
+void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */
+static int cpu_ident_amdc1e = 0; /* AMD C1E supported. */
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
static void
-cpu_idle_hlt(int busy)
+cpu_idle_acpi(int busy)
{
- /*
- * we must absolutely guarentee that hlt is the next instruction
- * after sti or we introduce a timing window.
- */
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
+ else if (cpu_idle_hook)
+ cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
-#endif
+#ifndef XEN
static void
-cpu_idle_acpi(int busy)
+cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
+ /*
+ * We must absolutely guarentee that hlt is the next instruction
+ * after sti or we introduce a timing window.
+ */
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
- else if (cpu_idle_hook)
- cpu_idle_hook();
else
__asm __volatile("sti; hlt");
+ *state = STATE_RUNNING;
}
+#endif
+
+/*
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
+ */
+#define MWAIT_C0 0xf0
+#define MWAIT_C1 0x00
+#define MWAIT_C2 0x10
+#define MWAIT_C3 0x20
+#define MWAIT_C4 0x30
-static int cpu_ident_amdc1e = 0;
+static void
+cpu_idle_mwait(int busy)
+{
+ int *state;
-static int
-cpu_probe_amdc1e(void)
-{
-#ifdef DEV_APIC
- int i;
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+ if (!sched_runnable()) {
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ cpu_mwait(0, MWAIT_C1);
+ }
+ *state = STATE_RUNNING;
+}
- /*
- * Forget it, if we're not using local APIC timer.
- */
- if (resource_disabled("apic", 0) ||
- (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
- return (0);
+static void
+cpu_idle_spin(int busy)
+{
+ int *state;
+ int i;
- /*
- * Detect the presence of C1E capability mostly on latest
- * dual-cores (or future) k8 family.
- */
- if (cpu_vendor_id == CPU_VENDOR_AMD &&
- (cpu_id & 0x00000f00) == 0x00000f00 &&
- (cpu_id & 0x0fff0000) >= 0x00040000) {
- cpu_ident_amdc1e = 1;
- return (1);
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
-#endif
- return (0);
}
/*
@@ -1279,32 +1310,20 @@ cpu_probe_amdc1e(void)
#define AMDK8_CMPHALT (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
{
- disable_intr();
- if (sched_runnable())
- enable_intr();
- else {
- uint64_t msr;
-
- msr = rdmsr(MSR_AMDK8_IPM);
- if (msr & AMDK8_CMPHALT)
- wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
- if (cpu_idle_hook)
- cpu_idle_hook();
- else
- __asm __volatile("sti; hlt");
+ /*
+ * Detect the presence of C1E capability mostly on latest
+ * dual-cores (or future) k8 family.
+ */
+ if (cpu_vendor_id == CPU_VENDOR_AMD &&
+ (cpu_id & 0x00000f00) == 0x00000f00 &&
+ (cpu_id & 0x0fff0000) >= 0x00040000) {
+ cpu_ident_amdc1e = 1;
}
}
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
#ifdef XEN
void (*cpu_idle_fn)(int) = cpu_idle_hlt;
#else
@@ -1314,79 +1333,72 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
+ uint64_t msr;
+
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
#if defined(SMP) && !defined(XEN)
if (mp_grab_cpu_hlt())
return;
#endif
- cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states. Lower 4 bits are sub-states.
- */
-#define MWAIT_C0 0xf0
-#define MWAIT_C1 0x00
-#define MWAIT_C2 0x10
-#define MWAIT_C3 0x20
-#define MWAIT_C4 0x30
-
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ goto out;
+ }
+ }
-static void
-cpu_idle_mwait(int busy)
-{
- int *mwait;
+#ifndef XEN
+ /* If we have time - switch timers into idle mode. */
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
+#endif
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
-}
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e
+#ifndef XEN
+ && cpu_disable_deep_sleep
+#endif
+ ) {
+ msr = rdmsr(MSR_AMDK8_IPM);
+ if (msr & AMDK8_CMPHALT)
+ wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+ }
-static void
-cpu_idle_mwait_hlt(int busy)
-{
- int *mwait;
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+#ifndef XEN
+ /* Switch timers mack into active mode. */
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+#endif
+out:
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -1399,8 +1411,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
- { cpu_idle_amdc1e, "amdc1e" },
{ cpu_idle_hlt, "hlt" },
{ cpu_idle_acpi, "acpi" },
{ NULL, NULL }
@@ -1419,8 +1429,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
p += sprintf(p, "%s, ", idle_tbl[i].id_name);
}
@@ -1429,6 +1439,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -1452,8 +1465,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
if (strstr(idle_tbl[i].id_name, "mwait") &&
(cpu_feature2 & CPUID2_MON) == 0)
continue;
- if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
- cpu_ident_amdc1e == 0)
+ if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+ cpu_idle_hook == NULL)
continue;
if (strcmp(idle_tbl[i].id_name, buf))
continue;
@@ -1463,9 +1476,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
@@ -2695,8 +2705,7 @@ init386(first)
thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
}
#else
@@ -2970,8 +2979,7 @@ init386(first)
thread0.td_pcb->pcb_ext = 0;
thread0.td_frame = &proc0_tf;
- if (cpu_probe_amdc1e())
- cpu_idle_fn = cpu_idle_amdc1e;
+ cpu_probe_amdc1e();
}
#endif
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index fa50ecf..f660e1c 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -167,7 +167,6 @@ u_long *ipi_invlcache_counts[MAXCPU];
u_long *ipi_rendezvous_counts[MAXCPU];
u_long *ipi_lazypmap_counts[MAXCPU];
static u_long *ipi_hardclock_counts[MAXCPU];
-static u_long *ipi_statclock_counts[MAXCPU];
#endif
/*
@@ -1284,16 +1283,22 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
void
ipi_bitmap_handler(struct trapframe frame)
{
+ struct trapframe *oldframe;
+ struct thread *td;
int cpu = PCPU_GET(cpuid);
u_int ipi_bitmap;
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = &frame;
ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
if (ipi_bitmap & (1 << IPI_PREEMPT)) {
#ifdef COUNT_IPIS
(*ipi_preempt_counts[cpu])++;
#endif
- sched_preempt(curthread);
+ sched_preempt(td);
}
if (ipi_bitmap & (1 << IPI_AST)) {
#ifdef COUNT_IPIS
@@ -1305,14 +1310,11 @@ ipi_bitmap_handler(struct trapframe frame)
#ifdef COUNT_IPIS
(*ipi_hardclock_counts[cpu])++;
#endif
- hardclockintr(&frame);
- }
- if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
-#ifdef COUNT_IPIS
- (*ipi_statclock_counts[cpu])++;
-#endif
- statclockintr(&frame);
+ hardclockintr();
}
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
/*
@@ -1627,8 +1629,6 @@ mp_ipi_intrcnt(void *dummy)
intrcnt_add(buf, &ipi_lazypmap_counts[i]);
snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
intrcnt_add(buf, &ipi_hardclock_counts[i]);
- snprintf(buf, sizeof(buf), "cpu%d:statclock", i);
- intrcnt_add(buf, &ipi_statclock_counts[i]);
}
}
SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h
index cada017..ff1f657 100644
--- a/sys/i386/include/apicvar.h
+++ b/sys/i386/include/apicvar.h
@@ -124,8 +124,7 @@
#define IPI_AST 0 /* Generate software trap. */
#define IPI_PREEMPT 1
#define IPI_HARDCLOCK 2
-#define IPI_STATCLOCK 3
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
@@ -152,8 +151,7 @@
#define IPI_AST 0 /* Generate software trap. */
#define IPI_PREEMPT 1
#define IPI_HARDCLOCK 2
-#define IPI_STATCLOCK 3
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 7) /* Stop CPU until restarted. */
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index c283b6b..ff5747e 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -373,11 +373,9 @@ int profprocs;
int ticks;
int psratio;
-int timer1hz;
-int timer2hz;
-static DPCPU_DEFINE(u_int, hard_cnt);
-static DPCPU_DEFINE(u_int, stat_cnt);
-static DPCPU_DEFINE(u_int, prof_cnt);
+static DPCPU_DEFINE(int, pcputicks); /* Per-CPU version of ticks. */
+static struct mtx global_hardclock_mtx;
+MTX_SYSINIT(global_hardclock_mtx, &global_hardclock_mtx, "ghc_mtx", MTX_SPIN);
/*
* Initialize clock frequencies and start both clocks running.
@@ -408,52 +406,6 @@ initclocks(dummy)
#endif
}
-void
-timer1clock(int usermode, uintfptr_t pc)
-{
- u_int *cnt;
-
- cnt = DPCPU_PTR(hard_cnt);
- *cnt += hz;
- if (*cnt >= timer1hz) {
- *cnt -= timer1hz;
- if (*cnt >= timer1hz)
- *cnt = 0;
- if (PCPU_GET(cpuid) == 0)
- hardclock(usermode, pc);
- else
- hardclock_cpu(usermode);
- }
- if (timer2hz == 0)
- timer2clock(usermode, pc);
-}
-
-void
-timer2clock(int usermode, uintfptr_t pc)
-{
- u_int *cnt;
- int t2hz = timer2hz ? timer2hz : timer1hz;
-
- cnt = DPCPU_PTR(stat_cnt);
- *cnt += stathz;
- if (*cnt >= t2hz) {
- *cnt -= t2hz;
- if (*cnt >= t2hz)
- *cnt = 0;
- statclock(usermode);
- }
- if (profprocs == 0)
- return;
- cnt = DPCPU_PTR(prof_cnt);
- *cnt += profhz;
- if (*cnt >= t2hz) {
- *cnt -= t2hz;
- if (*cnt >= t2hz)
- *cnt = 0;
- profclock(usermode, pc);
- }
-}
-
/*
* Each time the real-time timer fires, this function is called on all CPUs.
* Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
@@ -486,7 +438,7 @@ hardclock_cpu(int usermode)
PROC_SUNLOCK(p);
}
thread_lock(td);
- sched_tick();
+ sched_tick(1);
td->td_flags |= flags;
thread_unlock(td);
@@ -507,6 +459,7 @@ hardclock(int usermode, uintfptr_t pc)
atomic_add_int((volatile int *)&ticks, 1);
hardclock_cpu(usermode);
tc_ticktock();
+ cpu_tick_calibration();
/*
* If no separate statistics clock is available, run it from here.
*
@@ -525,6 +478,89 @@ hardclock(int usermode, uintfptr_t pc)
#endif /* SW_WATCHDOG */
}
+void
+hardclock_anycpu(int cnt, int usermode)
+{
+ struct pstats *pstats;
+ struct thread *td = curthread;
+ struct proc *p = td->td_proc;
+ int *t = DPCPU_PTR(pcputicks);
+ int flags;
+ int global, newticks;
+
+ /*
+ * Update per-CPU and possibly global ticks values.
+ */
+ *t += cnt;
+ do {
+ global = ticks;
+ newticks = *t - global;
+ if (newticks <= 0) {
+ if (newticks < -1)
+ *t = global - 1;
+ newticks = 0;
+ break;
+ }
+ } while (!atomic_cmpset_int(&ticks, global, *t));
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ flags = 0;
+ if (usermode &&
+ timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
+ PROC_SLOCK(p);
+ if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
+ tick * cnt) == 0)
+ flags |= TDF_ALRMPEND | TDF_ASTPENDING;
+ PROC_SUNLOCK(p);
+ }
+ if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
+ PROC_SLOCK(p);
+ if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
+ tick * cnt) == 0)
+ flags |= TDF_PROFPEND | TDF_ASTPENDING;
+ PROC_SUNLOCK(p);
+ }
+ thread_lock(td);
+ sched_tick(cnt);
+ td->td_flags |= flags;
+ thread_unlock(td);
+
+#ifdef HWPMC_HOOKS
+ if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
+ PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
+#endif
+ callout_tick();
+ /* We are in charge to handle this tick duty. */
+ if (newticks > 0) {
+ mtx_lock_spin(&global_hardclock_mtx);
+ tc_ticktock();
+#ifdef DEVICE_POLLING
+ hardclock_device_poll(); /* This is very short and quick. */
+#endif /* DEVICE_POLLING */
+#ifdef SW_WATCHDOG
+ if (watchdog_enabled > 0) {
+ watchdog_ticks -= newticks;
+ if (watchdog_ticks <= 0)
+ watchdog_fire();
+ }
+#endif /* SW_WATCHDOG */
+ mtx_unlock_spin(&global_hardclock_mtx);
+ }
+ if (curcpu == CPU_FIRST())
+ cpu_tick_calibration();
+}
+
+void
+hardclock_sync(int cpu)
+{
+ int *t = DPCPU_ID_PTR(cpu, pcputicks);
+
+ *t = ticks;
+}
+
/*
* Compute number of ticks in the specified amount of time.
*/
diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c
index 6b005de..29304a4 100644
--- a/sys/kern/kern_clocksource.c
+++ b/sys/kern/kern_clocksource.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/lock.h>
#include <sys/kdb.h>
+#include <sys/ktr.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/kernel.h>
@@ -59,28 +60,79 @@ __FBSDID("$FreeBSD$");
cyclic_clock_func_t cyclic_clock_func[MAXCPU];
#endif
-static void cpu_restartclocks(void);
-static void timercheck(void);
-inline static int doconfigtimer(int i);
-static void configtimer(int i);
+int cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
-static struct eventtimer *timer[2] = { NULL, NULL };
-static int timertest = 0;
-static int timerticks[2] = { 0, 0 };
-static int profiling_on = 0;
-static struct bintime timerperiod[2];
+static void setuptimer(void);
+static void loadtimer(struct bintime *now, int first);
+static int doconfigtimer(void);
+static void configtimer(int start);
+static int round_freq(struct eventtimer *et, int freq);
-static char timername[2][32];
-TUNABLE_STR("kern.eventtimer.timer1", timername[0], sizeof(*timername));
-TUNABLE_STR("kern.eventtimer.timer2", timername[1], sizeof(*timername));
+static void getnextcpuevent(struct bintime *event, int idle);
+static void getnextevent(struct bintime *event);
+static int handleevents(struct bintime *now, int fake);
+#ifdef SMP
+static void cpu_new_callout(int cpu, int ticks);
+#endif
+
+static struct mtx et_hw_mtx;
+
+#define ET_HW_LOCK(state) \
+ { \
+ if (timer->et_flags & ET_FLAGS_PERCPU) \
+ mtx_lock_spin(&(state)->et_hw_mtx); \
+ else \
+ mtx_lock_spin(&et_hw_mtx); \
+ }
+
+#define ET_HW_UNLOCK(state) \
+ { \
+ if (timer->et_flags & ET_FLAGS_PERCPU) \
+ mtx_unlock_spin(&(state)->et_hw_mtx); \
+ else \
+ mtx_unlock_spin(&et_hw_mtx); \
+ }
+
+static struct eventtimer *timer = NULL;
+static struct bintime timerperiod; /* Timer period for periodic mode. */
+static struct bintime hardperiod; /* hardclock() events period. */
+static struct bintime statperiod; /* statclock() events period. */
+static struct bintime profperiod; /* profclock() events period. */
+static struct bintime nexttick; /* Next global timer tick time. */
+static u_int busy = 0; /* Reconfiguration is in progress. */
+static int profiling = 0; /* Profiling events enabled. */
+
+static char timername[32]; /* Wanted timer. */
+TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
-static u_int singlemul = 0;
+static u_int singlemul = 0; /* Multiplier for periodic mode. */
TUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
- 0, "Multiplier, used in single timer mode");
-
-typedef u_int tc[2];
-static DPCPU_DEFINE(tc, configtimer);
+ 0, "Multiplier for periodic mode");
+
+static u_int idletick = 0; /* Idle mode allowed. */
+TUNABLE_INT("kern.eventtimer.idletick", &idletick);
+SYSCTL_INT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
+ 0, "Run periodic events when idle");
+
+static int periodic = 0; /* Periodic or one-shot mode. */
+TUNABLE_INT("kern.eventtimer.periodic", &periodic);
+
+struct pcpu_state {
+ struct mtx et_hw_mtx; /* Per-CPU timer mutex. */
+ u_int action; /* Reconfiguration requests. */
+ u_int handle; /* Immediate handle resuests. */
+ struct bintime now; /* Last tick time. */
+ struct bintime nextevent; /* Next scheduled event on this CPU. */
+ struct bintime nexttick; /* Next timer tick time. */
+ struct bintime nexthard; /* Next hardlock() event. */
+ struct bintime nextstat; /* Next statclock() event. */
+ struct bintime nextprof; /* Next profclock() event. */
+ int ipi; /* This CPU needs IPI. */
+ int idle; /* This CPU is in idle mode. */
+};
+
+static DPCPU_DEFINE(struct pcpu_state, timerstate);
#define FREQ2BT(freq, bt) \
{ \
@@ -91,159 +143,325 @@ static DPCPU_DEFINE(tc, configtimer);
(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
((bt)->frac >> 1))
-/* Per-CPU timer1 handler. */
-static int
-hardclockhandler(struct trapframe *frame)
+/*
+ * Timer broadcast IPI handler.
+ */
+int
+hardclockintr(void)
{
+ struct bintime now;
+ struct pcpu_state *state;
+ int done;
+
+ if (doconfigtimer() || busy)
+ return (FILTER_HANDLED);
+ state = DPCPU_PTR(timerstate);
+ now = state->now;
+ CTR4(KTR_SPARE2, "ipi at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
+ done = handleevents(&now, 0);
+ return (done ? FILTER_HANDLED : FILTER_STRAY);
+}
+/*
+ * Handle all events for specified time on this CPU
+ */
+static int
+handleevents(struct bintime *now, int fake)
+{
+ struct bintime t;
+ struct trapframe *frame;
+ struct pcpu_state *state;
+ uintfptr_t pc;
+ int usermode;
+ int done, runs;
+
+ CTR4(KTR_SPARE2, "handle at %d: now %d.%08x%08x",
+ curcpu, now->sec, (unsigned int)(now->frac >> 32),
+ (unsigned int)(now->frac & 0xffffffff));
+ done = 0;
+ if (fake) {
+ frame = NULL;
+ usermode = 0;
+ pc = 0;
+ } else {
+ frame = curthread->td_intr_frame;
+ usermode = TRAPF_USERMODE(frame);
+ pc = TRAPF_PC(frame);
+ }
#ifdef KDTRACE_HOOKS
/*
* If the DTrace hooks are configured and a callback function
* has been registered, then call it to process the high speed
* timers.
*/
- int cpu = curcpu;
- if (cyclic_clock_func[cpu] != NULL)
- (*cyclic_clock_func[cpu])(frame);
+ if (!fake && cyclic_clock_func[curcpu] != NULL)
+ (*cyclic_clock_func[curcpu])(frame);
#endif
-
- timer1clock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
- return (FILTER_HANDLED);
-}
-
-/* Per-CPU timer2 handler. */
-static int
-statclockhandler(struct trapframe *frame)
-{
-
- timer2clock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
- return (FILTER_HANDLED);
-}
-
-/* timer1 broadcast IPI handler. */
-int
-hardclockintr(struct trapframe *frame)
-{
-
- if (doconfigtimer(0))
- return (FILTER_HANDLED);
- return (hardclockhandler(frame));
+ runs = 0;
+ state = DPCPU_PTR(timerstate);
+ while (bintime_cmp(now, &state->nexthard, >=)) {
+ bintime_add(&state->nexthard, &hardperiod);
+ runs++;
+ }
+ if (runs) {
+ hardclock_anycpu(runs, usermode);
+ done = 1;
+ }
+ while (bintime_cmp(now, &state->nextstat, >=)) {
+ statclock(usermode);
+ bintime_add(&state->nextstat, &statperiod);
+ done = 1;
+ }
+ if (profiling) {
+ while (bintime_cmp(now, &state->nextprof, >=)) {
+ if (!fake)
+ profclock(usermode, pc);
+ bintime_add(&state->nextprof, &profperiod);
+ done = 1;
+ }
+ } else
+ state->nextprof = state->nextstat;
+ getnextcpuevent(&t, 0);
+ ET_HW_LOCK(state);
+ if (!busy) {
+ state->idle = 0;
+ state->nextevent = t;
+ loadtimer(now, 0);
+ }
+ ET_HW_UNLOCK(state);
+ return (done);
}
-/* timer2 broadcast IPI handler. */
-int
-statclockintr(struct trapframe *frame)
+/*
+ * Schedule binuptime of the next event on current CPU.
+ */
+static void
+getnextcpuevent(struct bintime *event, int idle)
{
-
- if (doconfigtimer(1))
- return (FILTER_HANDLED);
- return (statclockhandler(frame));
+ struct bintime tmp;
+ struct pcpu_state *state;
+ int skip;
+
+ state = DPCPU_PTR(timerstate);
+ *event = state->nexthard;
+ if (idle) { /* If CPU is idle - ask callouts for how long. */
+ skip = callout_tickstofirst() - 1;
+ CTR2(KTR_SPARE2, "skip at %d: %d", curcpu, skip);
+ tmp = hardperiod;
+ bintime_mul(&tmp, skip);
+ bintime_add(event, &tmp);
+ } else { /* If CPU is active - handle all types of events. */
+ if (bintime_cmp(event, &state->nextstat, >))
+ *event = state->nextstat;
+ if (profiling &&
+ bintime_cmp(event, &state->nextprof, >))
+ *event = state->nextprof;
+ }
}
-/* timer1 callback. */
+/*
+ * Schedule binuptime of the next event on all CPUs.
+ */
static void
-timer1cb(struct eventtimer *et, void *arg)
+getnextevent(struct bintime *event)
{
-
+ struct pcpu_state *state;
#ifdef SMP
- /* Broadcast interrupt to other CPUs for non-per-CPU timers */
- if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0)
- ipi_all_but_self(IPI_HARDCLOCK);
+ int cpu;
#endif
- if (timertest) {
- if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) {
- timerticks[0]++;
- if (timerticks[0] >= timer1hz) {
- ET_LOCK();
- timercheck();
- ET_UNLOCK();
+ int c;
+
+ state = DPCPU_PTR(timerstate);
+ *event = state->nextevent;
+ c = curcpu;
+#ifdef SMP
+ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
+ CPU_FOREACH(cpu) {
+ if (curcpu == cpu)
+ continue;
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ if (bintime_cmp(event, &state->nextevent, >)) {
+ *event = state->nextevent;
+ c = cpu;
}
}
}
- hardclockhandler(curthread->td_intr_frame);
+#endif
+ CTR5(KTR_SPARE2, "next at %d: next %d.%08x%08x by %d",
+ curcpu, event->sec, (unsigned int)(event->frac >> 32),
+ (unsigned int)(event->frac & 0xffffffff), c);
}
-/* timer2 callback. */
+/* Hardware timer callback function. */
static void
-timer2cb(struct eventtimer *et, void *arg)
+timercb(struct eventtimer *et, void *arg)
{
+ struct bintime now;
+ struct bintime *next;
+ struct pcpu_state *state;
+#ifdef SMP
+ int cpu, bcast;
+#endif
+
+ /* Do not touch anything if somebody reconfiguring timers. */
+ if (busy)
+ return;
+ /* Update present and next tick times. */
+ state = DPCPU_PTR(timerstate);
+ if (et->et_flags & ET_FLAGS_PERCPU) {
+ next = &state->nexttick;
+ } else
+ next = &nexttick;
+ if (periodic) {
+ now = *next; /* Ex-next tick time becomes present time. */
+ bintime_add(next, &timerperiod); /* Next tick in 1 period. */
+ } else {
+ binuptime(&now); /* Get present time from hardware. */
+ next->sec = -1; /* Next tick is not scheduled yet. */
+ }
+ state->now = now;
+ CTR4(KTR_SPARE2, "intr at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
#ifdef SMP
- /* Broadcast interrupt to other CPUs for non-per-CPU timers */
- if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0)
- ipi_all_but_self(IPI_STATCLOCK);
+ /* Prepare broadcasting to other CPUs for non-per-CPU timers. */
+ bcast = 0;
+ if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
+ CPU_FOREACH(cpu) {
+ if (curcpu == cpu)
+ continue;
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ ET_HW_LOCK(state);
+ state->now = now;
+ if (bintime_cmp(&now, &state->nextevent, >=)) {
+ state->nextevent.sec++;
+ state->ipi = 1;
+ bcast = 1;
+ }
+ ET_HW_UNLOCK(state);
+ }
+ }
#endif
- if (timertest) {
- if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) {
- timerticks[1]++;
- if (timerticks[1] >= timer2hz * 2) {
- ET_LOCK();
- timercheck();
- ET_UNLOCK();
+
+ /* Handle events for this time on this CPU. */
+ handleevents(&now, 0);
+
+#ifdef SMP
+ /* Broadcast interrupt to other CPUs for non-per-CPU timers. */
+ if (bcast) {
+ CPU_FOREACH(cpu) {
+ if (curcpu == cpu)
+ continue;
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ if (state->ipi) {
+ state->ipi = 0;
+ ipi_cpu(cpu, IPI_HARDCLOCK);
}
}
}
- statclockhandler(curthread->td_intr_frame);
+#endif
}
/*
- * Check that both timers are running with at least 1/4 of configured rate.
- * If not - replace the broken one.
+ * Load new value into hardware timer.
*/
static void
-timercheck(void)
+loadtimer(struct bintime *now, int start)
{
-
- if (!timertest)
- return;
- timertest = 0;
- if (timerticks[0] * 4 < timer1hz) {
- printf("Event timer \"%s\" is dead.\n", timer[0]->et_name);
- timer1hz = 0;
- configtimer(0);
- et_ban(timer[0]);
- et_free(timer[0]);
- timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[0] == NULL) {
- timer2hz = 0;
- configtimer(1);
- et_free(timer[1]);
- timer[1] = NULL;
- timer[0] = timer[1];
+ struct pcpu_state *state;
+ struct bintime new;
+ struct bintime *next;
+ uint64_t tmp;
+ int eq;
+
+ if (periodic) {
+ if (start) {
+ /*
+ * Try to start all periodic timers aligned
+ * to period to make events synchronous.
+ */
+ tmp = ((uint64_t)now->sec << 36) + (now->frac >> 28);
+ tmp = (tmp % (timerperiod.frac >> 28)) << 28;
+ tmp = timerperiod.frac - tmp;
+ new = timerperiod;
+ bintime_addx(&new, tmp);
+ CTR5(KTR_SPARE2, "load p at %d: now %d.%08x first in %d.%08x",
+ curcpu, now->sec, (unsigned int)(now->frac >> 32),
+ new.sec, (unsigned int)(new.frac >> 32));
+ et_start(timer, &new, &timerperiod);
+ }
+ } else {
+ if (timer->et_flags & ET_FLAGS_PERCPU) {
+ state = DPCPU_PTR(timerstate);
+ next = &state->nexttick;
+ } else
+ next = &nexttick;
+ getnextevent(&new);
+ eq = bintime_cmp(&new, next, ==);
+ CTR5(KTR_SPARE2, "load at %d: next %d.%08x%08x eq %d",
+ curcpu, new.sec, (unsigned int)(new.frac >> 32),
+ (unsigned int)(new.frac & 0xffffffff),
+ eq);
+ if (!eq) {
+ *next = new;
+ bintime_sub(&new, now);
+ et_start(timer, &new, NULL);
}
- et_init(timer[0], timer1cb, NULL, NULL);
- cpu_restartclocks();
- return;
- }
- if (timerticks[1] * 4 < timer2hz) {
- printf("Event timer \"%s\" is dead.\n", timer[1]->et_name);
- timer2hz = 0;
- configtimer(1);
- et_ban(timer[1]);
- et_free(timer[1]);
- timer[1] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[1] != NULL)
- et_init(timer[1], timer2cb, NULL, NULL);
- cpu_restartclocks();
- return;
}
}
/*
- * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
+ * Prepare event timer parameters after configuration changes.
*/
-inline static int
-doconfigtimer(int i)
+static void
+setuptimer(void)
{
- tc *conf;
+ int freq;
+
+ if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
+ periodic = 0;
+ else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
+ periodic = 1;
+ freq = hz * singlemul;
+ while (freq < (profiling ? profhz : stathz))
+ freq += hz;
+ freq = round_freq(timer, freq);
+ FREQ2BT(freq, &timerperiod);
+}
- conf = DPCPU_PTR(configtimer);
- if (atomic_load_acq_int(*conf + i)) {
- if (i == 0 ? timer1hz : timer2hz)
- et_start(timer[i], NULL, &timerperiod[i]);
- else
- et_stop(timer[i]);
- atomic_store_rel_int(*conf + i, 0);
+/*
+ * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
+ */
+static int
+doconfigtimer(void)
+{
+ struct bintime now;
+ struct pcpu_state *state;
+
+ state = DPCPU_PTR(timerstate);
+ switch (atomic_load_acq_int(&state->action)) {
+ case 1:
+ binuptime(&now);
+ ET_HW_LOCK(state);
+ loadtimer(&now, 1);
+ ET_HW_UNLOCK(state);
+ state->handle = 0;
+ atomic_store_rel_int(&state->action, 0);
+ return (1);
+ case 2:
+ ET_HW_LOCK(state);
+ et_stop(timer);
+ ET_HW_UNLOCK(state);
+ state->handle = 0;
+ atomic_store_rel_int(&state->action, 0);
+ return (1);
+ }
+ if (atomic_readandclear_int(&state->handle) && !busy) {
+ binuptime(&now);
+ handleevents(&now, 0);
return (1);
}
return (0);
@@ -254,45 +472,79 @@ doconfigtimer(int i)
* For per-CPU timers use IPI to make other CPUs to reconfigure.
*/
static void
-configtimer(int i)
+configtimer(int start)
{
-#ifdef SMP
- tc *conf;
+ struct bintime now, next;
+ struct pcpu_state *state;
int cpu;
+ if (start) {
+ setuptimer();
+ binuptime(&now);
+ }
critical_enter();
-#endif
- /* Start/stop global timer or per-CPU timer of this CPU. */
- if (i == 0 ? timer1hz : timer2hz)
- et_start(timer[i], NULL, &timerperiod[i]);
- else
- et_stop(timer[i]);
+ ET_HW_LOCK(DPCPU_PTR(timerstate));
+ if (start) {
+ /* Initialize time machine parameters. */
+ next = now;
+ bintime_add(&next, &timerperiod);
+ if (periodic)
+ nexttick = next;
+ else
+ nexttick.sec = -1;
+ CPU_FOREACH(cpu) {
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ state->now = now;
+ state->nextevent = next;
+ if (periodic)
+ state->nexttick = next;
+ else
+ state->nexttick.sec = -1;
+ state->nexthard = next;
+ state->nextstat = next;
+ state->nextprof = next;
+ hardclock_sync(cpu);
+ }
+ busy = 0;
+ /* Start global timer or per-CPU timer of this CPU. */
+ loadtimer(&now, 1);
+ } else {
+ busy = 1;
+ /* Stop global timer or per-CPU timer of this CPU. */
+ et_stop(timer);
+ }
+ ET_HW_UNLOCK(DPCPU_PTR(timerstate));
#ifdef SMP
- if ((timer[i]->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
+ /* If timer is global or there is no other CPUs yet - we are done. */
+ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
critical_exit();
return;
}
/* Set reconfigure flags for other CPUs. */
CPU_FOREACH(cpu) {
- conf = DPCPU_ID_PTR(cpu, configtimer);
- atomic_store_rel_int(*conf + i, (cpu == curcpu) ? 0 : 1);
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ atomic_store_rel_int(&state->action,
+ (cpu == curcpu) ? 0 : ( start ? 1 : 2));
}
- /* Send reconfigure IPI. */
- ipi_all_but_self(i == 0 ? IPI_HARDCLOCK : IPI_STATCLOCK);
+ /* Broadcast reconfigure IPI. */
+ ipi_all_but_self(IPI_HARDCLOCK);
/* Wait for reconfiguration completed. */
restart:
cpu_spinwait();
CPU_FOREACH(cpu) {
if (cpu == curcpu)
continue;
- conf = DPCPU_ID_PTR(cpu, configtimer);
- if (atomic_load_acq_int(*conf + i))
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ if (atomic_load_acq_int(&state->action))
goto restart;
}
- critical_exit();
#endif
+ critical_exit();
}
+/*
+ * Calculate nearest frequency supported by hardware timer.
+ */
static int
round_freq(struct eventtimer *et, int freq)
{
@@ -314,23 +566,49 @@ round_freq(struct eventtimer *et, int freq)
}
/*
- * Configure and start event timers.
+ * Configure and start event timers (BSP part).
*/
void
cpu_initclocks_bsp(void)
{
- int base, div;
+ struct pcpu_state *state;
+ int base, div, cpu;
- timer[0] = et_find(timername[0], ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[0] == NULL)
- timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[0] == NULL)
+ mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
+ CPU_FOREACH(cpu) {
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
+ }
+#ifdef SMP
+ callout_new_inserted = cpu_new_callout;
+#endif
+ /* Grab requested timer or the best of present. */
+ if (timername[0])
+ timer = et_find(timername, 0, 0);
+ if (timer == NULL && periodic) {
+ timer = et_find(NULL,
+ ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+ }
+ if (timer == NULL) {
+ timer = et_find(NULL,
+ ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
+ }
+ if (timer == NULL && !periodic) {
+ timer = et_find(NULL,
+ ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+ }
+ if (timer == NULL)
panic("No usable event timer found!");
- et_init(timer[0], timer1cb, NULL, NULL);
- timer[1] = et_find(timername[1][0] ? timername[1] : NULL,
- ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (timer[1])
- et_init(timer[1], timer2cb, NULL, NULL);
+ et_init(timer, timercb, NULL, NULL);
+
+ /* Adapt to timer capabilities. */
+ if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
+ periodic = 0;
+ else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
+ periodic = 1;
+ if (timer->et_flags & ET_FLAGS_C3STOP)
+ cpu_disable_deep_sleep++;
+
/*
* We honor the requested 'hz' value.
* We want to run stathz in the neighborhood of 128hz.
@@ -344,8 +622,8 @@ cpu_initclocks_bsp(void)
else
singlemul = 4;
}
- if (timer[1] == NULL) {
- base = round_freq(timer[0], hz * singlemul);
+ if (periodic) {
+ base = round_freq(timer, hz * singlemul);
singlemul = max((base + hz / 2) / hz, 1);
hz = (base + singlemul / 2) / singlemul;
if (base <= 128)
@@ -359,175 +637,236 @@ cpu_initclocks_bsp(void)
profhz = stathz;
while ((profhz + stathz) <= 128 * 64)
profhz += stathz;
- profhz = round_freq(timer[0], profhz);
+ profhz = round_freq(timer, profhz);
} else {
- hz = round_freq(timer[0], hz);
- stathz = round_freq(timer[1], 127);
- profhz = round_freq(timer[1], stathz * 64);
+ hz = round_freq(timer, hz);
+ stathz = round_freq(timer, 127);
+ profhz = round_freq(timer, stathz * 64);
}
tick = 1000000 / hz;
+ FREQ2BT(hz, &hardperiod);
+ FREQ2BT(stathz, &statperiod);
+ FREQ2BT(profhz, &profperiod);
ET_LOCK();
- cpu_restartclocks();
+ configtimer(1);
ET_UNLOCK();
}
-/* Start per-CPU event timers on APs. */
+/*
+ * Start per-CPU event timers on APs.
+ */
void
cpu_initclocks_ap(void)
{
+ struct bintime now;
+ struct pcpu_state *state;
+
+ if (timer->et_flags & ET_FLAGS_PERCPU) {
+ state = DPCPU_PTR(timerstate);
+ binuptime(&now);
+ ET_HW_LOCK(state);
+ loadtimer(&now, 1);
+ ET_HW_UNLOCK(state);
+ }
+}
+
+/*
+ * Switch to profiling clock rates.
+ */
+void
+cpu_startprofclock(void)
+{
ET_LOCK();
- if (timer[0]->et_flags & ET_FLAGS_PERCPU)
- et_start(timer[0], NULL, &timerperiod[0]);
- if (timer[1] && timer[1]->et_flags & ET_FLAGS_PERCPU)
- et_start(timer[1], NULL, &timerperiod[1]);
+ if (periodic) {
+ configtimer(0);
+ profiling = 1;
+ configtimer(1);
+ } else
+ profiling = 1;
ET_UNLOCK();
}
-/* Reconfigure and restart event timers after configuration changes. */
-static void
-cpu_restartclocks(void)
+/*
+ * Switch to regular clock rates.
+ */
+void
+cpu_stopprofclock(void)
{
- /* Stop all event timers. */
- timertest = 0;
- if (timer1hz) {
- timer1hz = 0;
+ ET_LOCK();
+ if (periodic) {
configtimer(0);
- }
- if (timer[1] && timer2hz) {
- timer2hz = 0;
+ profiling = 0;
configtimer(1);
- }
- /* Calculate new event timers parameters. */
- if (timer[1] == NULL) {
- timer1hz = hz * singlemul;
- while (timer1hz < (profiling_on ? profhz : stathz))
- timer1hz += hz;
- timer2hz = 0;
- } else {
- timer1hz = hz;
- timer2hz = profiling_on ? profhz : stathz;
- timer2hz = round_freq(timer[1], timer2hz);
- }
- timer1hz = round_freq(timer[0], timer1hz);
- printf("Starting kernel event timers: %s @ %dHz, %s @ %dHz\n",
- timer[0]->et_name, timer1hz,
- timer[1] ? timer[1]->et_name : "NONE", timer2hz);
- /* Restart event timers. */
- FREQ2BT(timer1hz, &timerperiod[0]);
- configtimer(0);
- if (timer[1]) {
- timerticks[0] = 0;
- timerticks[1] = 0;
- FREQ2BT(timer2hz, &timerperiod[1]);
- configtimer(1);
- timertest = 1;
- }
+ } else
+ profiling = 0;
+ ET_UNLOCK();
}
-/* Switch to profiling clock rates. */
+/*
+ * Switch to idle mode (all ticks handled).
+ */
void
-cpu_startprofclock(void)
+cpu_idleclock(void)
{
+ struct bintime now, t;
+ struct pcpu_state *state;
- ET_LOCK();
- profiling_on = 1;
- cpu_restartclocks();
- ET_UNLOCK();
+ if (idletick || busy ||
+ (periodic && (timer->et_flags & ET_FLAGS_PERCPU)))
+ return;
+ state = DPCPU_PTR(timerstate);
+ if (periodic)
+ now = state->now;
+ else
+ binuptime(&now);
+ CTR4(KTR_SPARE2, "idle at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
+ getnextcpuevent(&t, 1);
+ ET_HW_LOCK(state);
+ state->idle = 1;
+ state->nextevent = t;
+ if (!periodic)
+ loadtimer(&now, 0);
+ ET_HW_UNLOCK(state);
}
-/* Switch to regular clock rates. */
+/*
+ * Switch to active mode (skip empty ticks).
+ */
void
-cpu_stopprofclock(void)
+cpu_activeclock(void)
{
+ struct bintime now;
+ struct pcpu_state *state;
+ struct thread *td;
- ET_LOCK();
- profiling_on = 0;
- cpu_restartclocks();
- ET_UNLOCK();
+ state = DPCPU_PTR(timerstate);
+ if (state->idle == 0 || busy)
+ return;
+ if (periodic)
+ now = state->now;
+ else
+ binuptime(&now);
+ CTR4(KTR_SPARE2, "active at %d: now %d.%08x%08x",
+ curcpu, now.sec, (unsigned int)(now.frac >> 32),
+ (unsigned int)(now.frac & 0xffffffff));
+ spinlock_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ handleevents(&now, 1);
+ td->td_intr_nesting_level--;
+ spinlock_exit();
}
-/* Report or change the active event timers hardware. */
+#ifdef SMP
+static void
+cpu_new_callout(int cpu, int ticks)
+{
+ struct bintime tmp;
+ struct pcpu_state *state;
+
+ CTR3(KTR_SPARE2, "new co at %d: on %d in %d",
+ curcpu, cpu, ticks);
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ ET_HW_LOCK(state);
+ if (state->idle == 0 || busy) {
+ ET_HW_UNLOCK(state);
+ return;
+ }
+ /*
+ * If timer is periodic - just update next event time for target CPU.
+ */
+ if (periodic) {
+ state->nextevent = state->nexthard;
+ tmp = hardperiod;
+ bintime_mul(&tmp, ticks - 1);
+ bintime_add(&state->nextevent, &tmp);
+ ET_HW_UNLOCK(state);
+ return;
+ }
+ /*
+ * Otherwise we have to wake that CPU up, as we can't get present
+ * bintime to reprogram global timer from here. If timer is per-CPU,
+ * we by definition can't do it from here.
+ */
+ ET_HW_UNLOCK(state);
+ if (timer->et_flags & ET_FLAGS_PERCPU) {
+ state->handle = 1;
+ ipi_cpu(cpu, IPI_HARDCLOCK);
+ } else {
+ if (!cpu_idle_wakeup(cpu))
+ ipi_cpu(cpu, IPI_AST);
+ }
+}
+#endif
+
+/*
+ * Report or change the active event timers hardware.
+ */
static int
-sysctl_kern_eventtimer_timer1(SYSCTL_HANDLER_ARGS)
+sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
{
char buf[32];
struct eventtimer *et;
int error;
ET_LOCK();
- et = timer[0];
+ et = timer;
snprintf(buf, sizeof(buf), "%s", et->et_name);
ET_UNLOCK();
error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
ET_LOCK();
- et = timer[0];
+ et = timer;
if (error != 0 || req->newptr == NULL ||
- strcmp(buf, et->et_name) == 0) {
+ strcasecmp(buf, et->et_name) == 0) {
ET_UNLOCK();
return (error);
}
- et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+ et = et_find(buf, 0, 0);
if (et == NULL) {
ET_UNLOCK();
return (ENOENT);
}
- timer1hz = 0;
configtimer(0);
- et_free(timer[0]);
- timer[0] = et;
- et_init(timer[0], timer1cb, NULL, NULL);
- cpu_restartclocks();
+ et_free(timer);
+ if (et->et_flags & ET_FLAGS_C3STOP)
+ cpu_disable_deep_sleep++;
+ if (timer->et_flags & ET_FLAGS_C3STOP)
+ cpu_disable_deep_sleep--;
+ timer = et;
+ et_init(timer, timercb, NULL, NULL);
+ configtimer(1);
ET_UNLOCK();
return (error);
}
-SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer1,
+SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- 0, 0, sysctl_kern_eventtimer_timer1, "A", "Primary event timer");
+ 0, 0, sysctl_kern_eventtimer_timer, "A", "Kernel event timer");
+/*
+ * Report or change the active event timer periodicity.
+ */
static int
-sysctl_kern_eventtimer_timer2(SYSCTL_HANDLER_ARGS)
+sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
{
- char buf[32];
- struct eventtimer *et;
- int error;
+ int error, val;
- ET_LOCK();
- et = timer[1];
- if (et == NULL)
- snprintf(buf, sizeof(buf), "NONE");
- else
- snprintf(buf, sizeof(buf), "%s", et->et_name);
- ET_UNLOCK();
- error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
- ET_LOCK();
- et = timer[1];
- if (error != 0 || req->newptr == NULL ||
- strcmp(buf, et ? et->et_name : "NONE") == 0) {
- ET_UNLOCK();
+ val = periodic;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
return (error);
- }
- et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
- if (et == NULL && strcasecmp(buf, "NONE") != 0) {
- ET_UNLOCK();
- return (ENOENT);
- }
- if (timer[1] != NULL) {
- timer2hz = 0;
- configtimer(1);
- et_free(timer[1]);
- }
- timer[1] = et;
- if (timer[1] != NULL)
- et_init(timer[1], timer2cb, NULL, NULL);
- cpu_restartclocks();
+ ET_LOCK();
+ configtimer(0);
+ periodic = val;
+ configtimer(1);
ET_UNLOCK();
return (error);
}
-SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer2,
- CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
- 0, 0, sysctl_kern_eventtimer_timer2, "A", "Secondary event timer");
+SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_kern_eventtimer_periodic, "I", "Kernel event timer periodic");
#endif
-
diff --git a/sys/kern/kern_et.c b/sys/kern/kern_et.c
index 17b9c67..8c37556 100644
--- a/sys/kern/kern_et.c
+++ b/sys/kern/kern_et.c
@@ -38,7 +38,7 @@ SLIST_HEAD(et_eventtimers_list, eventtimer);
static struct et_eventtimers_list eventtimers = SLIST_HEAD_INITIALIZER(et_eventtimers);
struct mtx et_eventtimers_mtx;
-MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_SPIN);
+MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_DEF);
SYSCTL_NODE(_kern, OID_AUTO, eventtimer, CTLFLAG_RW, 0, "Event timers");
SYSCTL_NODE(_kern_eventtimer, OID_AUTO, et, CTLFLAG_RW, 0, "");
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index d973348..811b24f 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -770,16 +770,11 @@ void
tc_ticktock(void)
{
static int count;
- static time_t last_calib;
if (++count < tc_tick)
return;
count = 0;
tc_windup();
- if (time_uptime != last_calib && !(time_uptime & 0xf)) {
- cpu_tick_calibrate(0);
- last_calib = time_uptime;
- }
}
static void
@@ -830,9 +825,20 @@ tc_cpu_ticks(void)
return (u + base);
}
+void
+cpu_tick_calibration(void)
+{
+ static time_t last_calib;
+
+ if (time_uptime != last_calib && !(time_uptime & 0xf)) {
+ cpu_tick_calibrate(0);
+ last_calib = time_uptime;
+ }
+}
+
/*
* This function gets called every 16 seconds on only one designated
- * CPU in the system from hardclock() via tc_ticktock().
+ * CPU in the system from hardclock() via cpu_tick_calibration()().
*
* Whenever the real time clock is stepped we get called with reset=1
* to make sure we handle suspend/resume and similar events correctly.
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index 32d5691..5697792 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -111,6 +111,7 @@ struct callout_cpu {
int cc_softticks;
int cc_cancel;
int cc_waiting;
+ int cc_firsttick;
};
#ifdef SMP
@@ -126,6 +127,7 @@ struct callout_cpu cc_cpu;
#define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock)
static int timeout_cpu;
+void (*callout_new_inserted)(int cpu, int ticks) = NULL;
MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
@@ -260,7 +262,7 @@ callout_tick(void)
need_softclock = 0;
cc = CC_SELF();
mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- cc->cc_ticks++;
+ cc->cc_firsttick = cc->cc_ticks = ticks;
for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
bucket = cc->cc_softticks & callwheelmask;
if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
@@ -277,6 +279,34 @@ callout_tick(void)
swi_sched(cc->cc_cookie, 0);
}
+int
+callout_tickstofirst(void)
+{
+ struct callout_cpu *cc;
+ struct callout *c;
+ struct callout_tailq *sc;
+ int curticks;
+ int skip = 1;
+
+ cc = CC_SELF();
+ mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
+ curticks = cc->cc_ticks;
+ while( skip < ncallout && skip < hz/8 ) {
+ sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
+ /* search scanning ticks */
+ TAILQ_FOREACH( c, sc, c_links.tqe ){
+ if (c && (c->c_time <= curticks + ncallout)
+ && (c->c_time > 0))
+ goto out;
+ }
+ skip++;
+ }
+out:
+ cc->cc_firsttick = curticks + skip;
+ mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+ return (skip);
+}
+
static struct callout_cpu *
callout_lock(struct callout *c)
{
@@ -639,9 +669,14 @@ retry:
c->c_arg = arg;
c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
c->c_func = ftn;
- c->c_time = cc->cc_ticks + to_ticks;
+ c->c_time = ticks + to_ticks;
TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
c, c_links.tqe);
+ if ((c->c_time - cc->cc_firsttick) < 0) {
+ cc->cc_firsttick = c->c_time;
+ (*callout_new_inserted)(cpu,
+ to_ticks + (ticks - cc->cc_ticks));
+ }
CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
CC_UNLOCK(cc);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 780dc6d..9face64 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -1547,7 +1547,7 @@ sched_pctcpu(struct thread *td)
}
void
-sched_tick(void)
+sched_tick(int cnt)
{
}
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index bb2d34a..e1cc172 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -196,7 +196,7 @@ static int preempt_thresh = 0;
#endif
static int static_boost = PRI_MIN_TIMESHARE;
static int sched_idlespins = 10000;
-static int sched_idlespinthresh = 64;
+static int sched_idlespinthresh = 16;
/*
* tdq - per processor runqs and statistics. All fields are protected by the
@@ -2163,7 +2163,7 @@ sched_clock(struct thread *td)
* is easier than trying to scale based on stathz.
*/
void
-sched_tick(void)
+sched_tick(int cnt)
{
struct td_sched *ts;
@@ -2175,7 +2175,7 @@ sched_tick(void)
if (ts->ts_incrtick == ticks)
return;
/* Adjust ticks for pctcpu */
- ts->ts_ticks += 1 << SCHED_TICK_SHIFT;
+ ts->ts_ticks += cnt << SCHED_TICK_SHIFT;
ts->ts_ltick = ticks;
ts->ts_incrtick = ticks;
/*
@@ -2549,7 +2549,7 @@ sched_idletd(void *dummy)
if (tdq->tdq_load == 0) {
tdq->tdq_cpu_idle = 1;
if (tdq->tdq_load == 0) {
- cpu_idle(switchcnt > sched_idlespinthresh);
+ cpu_idle(switchcnt > sched_idlespinthresh * 4);
tdq->tdq_switchcnt++;
}
tdq->tdq_cpu_idle = 0;
diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h
index 28efd4c..58aaf03 100644
--- a/sys/mips/include/smp.h
+++ b/sys/mips/include/smp.h
@@ -28,7 +28,6 @@
#define IPI_STOP_HARD 0x0008
#define IPI_PREEMPT 0x0010
#define IPI_HARDCLOCK 0x0020
-#define IPI_STATCLOCK 0x0040
#ifndef LOCORE
diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c
index c7ff3d8..ef2f24c 100644
--- a/sys/mips/mips/mp_machdep.c
+++ b/sys/mips/mips/mp_machdep.c
@@ -164,11 +164,7 @@ mips_ipi_handler(void *arg)
break;
case IPI_HARDCLOCK:
CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
- hardclockintr(arg);;
- break;
- case IPI_STATCLOCK:
- CTR1(KTR_SMP, "%s: IPI_STATCLOCK", __func__);
- statclockintr(arg);;
+ hardclockintr();;
break;
default:
panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu);
diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c
index 671ce5b..22dc8f0 100644
--- a/sys/pc98/pc98/machdep.c
+++ b/sys/pc98/pc98/machdep.c
@@ -1120,40 +1120,36 @@ cpu_halt(void)
__asm__ ("hlt");
}
+static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+ 0, "Use MONITOR/MWAIT for short idle");
+
+#define STATE_RUNNING 0x0
+#define STATE_MWAIT 0x1
+#define STATE_SLEEPING 0x2
+
static void
cpu_idle_hlt(int busy)
{
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_SLEEPING;
/*
- * we must absolutely guarentee that hlt is the next instruction
+ * We must absolutely guarentee that hlt is the next instruction
* after sti or we introduce a timing window.
*/
disable_intr();
- if (sched_runnable())
+ if (sched_runnable())
enable_intr();
else
__asm __volatile("sti; hlt");
-}
-
-static void
-cpu_idle_spin(int busy)
-{
- return;
-}
-
-void (*cpu_idle_fn)(int) = cpu_idle_hlt;
-
-void
-cpu_idle(int busy)
-{
-#if defined(SMP)
- if (mp_grab_cpu_hlt())
- return;
-#endif
- cpu_idle_fn(busy);
+ *state = STATE_RUNNING;
}
/*
- * mwait cpu power states. Lower 4 bits are sub-states.
+ * MWAIT cpu power states. Lower 4 bits are sub-states.
*/
#define MWAIT_C0 0xf0
#define MWAIT_C1 0x00
@@ -1161,63 +1157,91 @@ cpu_idle(int busy)
#define MWAIT_C3 0x20
#define MWAIT_C4 0x30
-#define MWAIT_DISABLED 0x0
-#define MWAIT_WOKEN 0x1
-#define MWAIT_WAITING 0x2
-
static void
cpu_idle_mwait(int busy)
{
- int *mwait;
-
- mwait = (int *)PCPU_PTR(monitorbuf);
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
- return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+ int *state;
+
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_MWAIT;
+ if (!sched_runnable()) {
+ cpu_monitor(state, 0, 0);
+ if (*state == STATE_MWAIT)
+ cpu_mwait(0, MWAIT_C1);
+ }
+ *state = STATE_RUNNING;
}
static void
-cpu_idle_mwait_hlt(int busy)
+cpu_idle_spin(int busy)
{
- int *mwait;
+ int *state;
+ int i;
- mwait = (int *)PCPU_PTR(monitorbuf);
- if (busy == 0) {
- *mwait = MWAIT_DISABLED;
- cpu_idle_hlt(busy);
- return;
+ state = (int *)PCPU_PTR(monitorbuf);
+ *state = STATE_RUNNING;
+ for (i = 0; i < 1000; i++) {
+ if (sched_runnable())
+ return;
+ cpu_spinwait();
}
- *mwait = MWAIT_WAITING;
- if (sched_runnable())
+}
+
+void (*cpu_idle_fn)(int) = cpu_idle_hlt;
+
+void
+cpu_idle(int busy)
+{
+
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
+#ifdef SMP
+ if (mp_grab_cpu_hlt())
return;
- cpu_monitor(mwait, 0, 0);
- if (*mwait == MWAIT_WAITING)
- cpu_mwait(0, MWAIT_C1);
+#endif
+ /* If we are busy - try to use fast methods. */
+ if (busy) {
+ if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+ cpu_idle_mwait(busy);
+ goto out;
+ }
+ }
+
+ /* If we have time - switch timers into idle mode. */
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
+
+ /* Call main idle method. */
+ cpu_idle_fn(busy);
+
+ /* Switch timers mack into active mode. */
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
+out:
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
cpu_idle_wakeup(int cpu)
{
struct pcpu *pcpu;
- int *mwait;
+ int *state;
- if (cpu_idle_fn == cpu_idle_spin)
- return (1);
- if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
- return (0);
pcpu = pcpu_find(cpu);
- mwait = (int *)pcpu->pc_monitorbuf;
+ state = (int *)pcpu->pc_monitorbuf;
/*
* This doesn't need to be atomic since missing the race will
* simply result in unnecessary IPIs.
*/
- if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+ if (*state == STATE_SLEEPING)
return (0);
- *mwait = MWAIT_WOKEN;
-
+ if (*state == STATE_MWAIT)
+ *state = STATE_RUNNING;
return (1);
}
@@ -1230,7 +1254,6 @@ struct {
} idle_tbl[] = {
{ cpu_idle_spin, "spin" },
{ cpu_idle_mwait, "mwait" },
- { cpu_idle_mwait_hlt, "mwait_hlt" },
{ cpu_idle_hlt, "hlt" },
{ NULL, NULL }
};
@@ -1255,6 +1278,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
return (error);
}
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+ 0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
static int
idle_sysctl(SYSCTL_HANDLER_ARGS)
{
@@ -1286,9 +1312,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
return (EINVAL);
}
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
- 0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
idle_sysctl, "A", "currently selected idle function");
diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c
index 3290fa6..ba06531 100644
--- a/sys/powerpc/aim/machdep.c
+++ b/sys/powerpc/aim/machdep.c
@@ -638,7 +638,13 @@ cpu_idle(int busy)
panic("ints disabled in idleproc!");
}
#endif
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
if (powerpc_pow_enabled) {
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
switch (vers) {
case IBM970:
case IBM970FX:
@@ -658,7 +664,13 @@ cpu_idle(int busy)
isync();
break;
}
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
}
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c
index c725dd8..c4b80cc 100644
--- a/sys/powerpc/booke/machdep.c
+++ b/sys/powerpc/booke/machdep.c
@@ -488,9 +488,21 @@ cpu_idle (int busy)
}
#endif
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+ busy, curcpu);
+ if (!busy) {
+ critical_enter();
+ cpu_idleclock();
+ }
/* Freescale E500 core RM section 6.4.1. */
msr = msr | PSL_WE;
__asm __volatile("msync; mtmsr %0; isync" :: "r" (msr));
+ if (!busy) {
+ cpu_activeclock();
+ critical_exit();
+ }
+ CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+ busy, curcpu);
}
int
diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h
index c78af74..cf95278 100644
--- a/sys/powerpc/include/smp.h
+++ b/sys/powerpc/include/smp.h
@@ -37,7 +37,6 @@
#define IPI_STOP 3
#define IPI_STOP_HARD 3
#define IPI_HARDCLOCK 4
-#define IPI_STATCLOCK 5
#ifndef LOCORE
diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c
index a833638..6915c4b 100644
--- a/sys/powerpc/powerpc/mp_machdep.c
+++ b/sys/powerpc/powerpc/mp_machdep.c
@@ -315,7 +315,7 @@ powerpc_ipi_handler(void *arg)
break;
case IPI_HARDCLOCK:
CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
- hardclockintr(curthread->td_intr_frame);
+ hardclockintr();
break;
}
}
diff --git a/sys/sparc64/include/intr_machdep.h b/sys/sparc64/include/intr_machdep.h
index 254ac78..158b5b6 100644
--- a/sys/sparc64/include/intr_machdep.h
+++ b/sys/sparc64/include/intr_machdep.h
@@ -47,7 +47,6 @@
#define PIL_STOP 5 /* stop cpu ipi */
#define PIL_PREEMPT 6 /* preempt idle thread cpu ipi */
#define PIL_HARDCLOCK 7 /* hardclock broadcast */
-#define PIL_STATCLOCK 8 /* statclock broadcast */
#define PIL_FILTER 12 /* filter interrupts */
#define PIL_FAST 13 /* fast interrupts */
#define PIL_TICK 14 /* tick interrupts */
diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h
index 3812431..3ca8e03 100644
--- a/sys/sparc64/include/smp.h
+++ b/sys/sparc64/include/smp.h
@@ -59,7 +59,6 @@
#define IPI_RENDEZVOUS PIL_RENDEZVOUS
#define IPI_PREEMPT PIL_PREEMPT
#define IPI_HARDCLOCK PIL_HARDCLOCK
-#define IPI_STATCLOCK PIL_STATCLOCK
#define IPI_STOP PIL_STOP
#define IPI_STOP_HARD PIL_STOP
diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c
index 8e610f6..8571286 100644
--- a/sys/sparc64/sparc64/intr_machdep.c
+++ b/sys/sparc64/sparc64/intr_machdep.c
@@ -97,8 +97,7 @@ static const char *const pil_names[] = {
"stop", /* PIL_STOP */
"preempt", /* PIL_PREEMPT */
"hardclock", /* PIL_HARDCLOCK */
- "statclock", /* PIL_STATCLOCK */
- "stray", "stray", "stray",
+ "stray", "stray", "stray", "stray",
"filter", /* PIL_FILTER */
"fast", /* PIL_FAST */
"tick", /* PIL_TICK */
diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c
index 57a2d6f..e5a9fb3 100644
--- a/sys/sparc64/sparc64/mp_machdep.c
+++ b/sys/sparc64/sparc64/mp_machdep.c
@@ -98,7 +98,6 @@ __FBSDID("$FreeBSD$");
static ih_func_t cpu_ipi_ast;
static ih_func_t cpu_ipi_hardclock;
static ih_func_t cpu_ipi_preempt;
-static ih_func_t cpu_ipi_statclock;
static ih_func_t cpu_ipi_stop;
/*
@@ -292,7 +291,6 @@ cpu_mp_start(void)
intr_setup(PIL_STOP, cpu_ipi_stop, -1, NULL, NULL);
intr_setup(PIL_PREEMPT, cpu_ipi_preempt, -1, NULL, NULL);
intr_setup(PIL_HARDCLOCK, cpu_ipi_hardclock, -1, NULL, NULL);
- intr_setup(PIL_STATCLOCK, cpu_ipi_statclock, -1, NULL, NULL);
cpuid_to_mid[curcpu] = PCPU_GET(mid);
@@ -524,15 +522,18 @@ cpu_ipi_preempt(struct trapframe *tf)
static void
cpu_ipi_hardclock(struct trapframe *tf)
{
+ struct trapframe *oldframe;
+ struct thread *td;
- hardclockintr(tf);
-}
-
-static void
-cpu_ipi_statclock(struct trapframe *tf)
-{
-
- statclockintr(tf);
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = tf;
+ hardclockintr();
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
static void
diff --git a/sys/sun4v/include/intr_machdep.h b/sys/sun4v/include/intr_machdep.h
index 370a5c0..f686e66 100644
--- a/sys/sun4v/include/intr_machdep.h
+++ b/sys/sun4v/include/intr_machdep.h
@@ -47,7 +47,6 @@
#define PIL_STOP 5 /* stop cpu ipi */
#define PIL_PREEMPT 6 /* preempt idle thread cpu ipi */
#define PIL_HARDCLOCK 7 /* hardclock broadcast */
-#define PIL_STATCLOCK 8 /* statclock broadcast */
#define PIL_FAST 13 /* fast interrupts */
#define PIL_TICK 14
diff --git a/sys/sun4v/include/smp.h b/sys/sun4v/include/smp.h
index 56c50eb..3202089 100644
--- a/sys/sun4v/include/smp.h
+++ b/sys/sun4v/include/smp.h
@@ -47,7 +47,6 @@
#define IPI_STOP_HARD PIL_STOP
#define IPI_PREEMPT PIL_PREEMPT
#define IPI_HARDCLOCK PIL_HARDCLOCK
-#define IPI_STATCLOCK PIL_STATCLOCK
#define IPI_RETRIES 5000
@@ -83,7 +82,6 @@ void cpu_ipi_ast(struct trapframe *tf);
void cpu_ipi_stop(struct trapframe *tf);
void cpu_ipi_preempt(struct trapframe *tf);
void cpu_ipi_hardclock(struct trapframe *tf);
-void cpu_ipi_statclock(struct trapframe *tf);
void ipi_all_but_self(u_int ipi);
void ipi_cpu(int cpu, u_int ipi);
diff --git a/sys/sun4v/sun4v/intr_machdep.c b/sys/sun4v/sun4v/intr_machdep.c
index 123493e..3587402 100644
--- a/sys/sun4v/sun4v/intr_machdep.c
+++ b/sys/sun4v/sun4v/intr_machdep.c
@@ -110,8 +110,7 @@ static char *pil_names[] = {
"stop", /* PIL_STOP */
"preempt", /* PIL_PREEMPT */
"hardclock", /* PIL_HARDCLOCK */
- "statclock", /* PIL_STATCLOCK */
- "stray", "stray", "stray", "stray",
+ "stray", "stray", "stray", "stray", "stray",
"fast", /* PIL_FAST */
"tick", /* PIL_TICK */
};
@@ -265,7 +264,6 @@ intr_init(void)
intr_handlers[PIL_STOP]= cpu_ipi_stop;
intr_handlers[PIL_PREEMPT]= cpu_ipi_preempt;
intr_handlers[PIL_HARDCLOCK]= cpu_ipi_hardclock;
- intr_handlers[PIL_STATCLOCK]= cpu_ipi_statclock;
#endif
mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
cpu_intrq_alloc();
diff --git a/sys/sun4v/sun4v/mp_machdep.c b/sys/sun4v/sun4v/mp_machdep.c
index 2e9a378..a9535e3 100644
--- a/sys/sun4v/sun4v/mp_machdep.c
+++ b/sys/sun4v/sun4v/mp_machdep.c
@@ -472,15 +472,18 @@ cpu_ipi_preempt(struct trapframe *tf)
void
cpu_ipi_hardclock(struct trapframe *tf)
{
+ struct trapframe *oldframe;
+ struct thread *td;
- hardclockintr(tf);
-}
-
-void
-cpu_ipi_statclock(struct trapframe *tf)
-{
-
- statclockintr(tf);
+ critical_enter();
+ td = curthread;
+ td->td_intr_nesting_level++;
+ oldframe = td->td_intr_frame;
+ td->td_intr_frame = tf;
+ hardclockintr();
+ td->td_intr_frame = oldframe;
+ td->td_intr_nesting_level--;
+ critical_exit();
}
void
diff --git a/sys/sys/callout.h b/sys/sys/callout.h
index 2d43d14..8fcd06e 100644
--- a/sys/sys/callout.h
+++ b/sys/sys/callout.h
@@ -96,7 +96,8 @@ int callout_schedule_on(struct callout *, int, int);
#define callout_stop(c) _callout_stop_safe(c, 0)
int _callout_stop_safe(struct callout *, int);
void callout_tick(void);
-
+int callout_tickstofirst(void);
+extern void (*callout_new_inserted)(int cpu, int ticks);
#endif
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index d0ebffd..92dd4c4 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -111,7 +111,7 @@ void sched_preempt(struct thread *td);
void sched_add(struct thread *td, int flags);
void sched_clock(struct thread *td);
void sched_rem(struct thread *td);
-void sched_tick(void);
+void sched_tick(int cnt);
void sched_relinquish(struct thread *td);
struct thread *sched_choose(void);
void sched_idletd(void *);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index f913887..8e98ef4 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -237,20 +237,22 @@ void realitexpire(void *);
int sysbeep(int hertz, int period);
void hardclock(int usermode, uintfptr_t pc);
+void hardclock_anycpu(int cnt, int usermode);
void hardclock_cpu(int usermode);
+void hardclock_sync(int cpu);
void softclock(void *);
void statclock(int usermode);
void profclock(int usermode, uintfptr_t pc);
-void timer1clock(int usermode, uintfptr_t pc);
-void timer2clock(int usermode, uintfptr_t pc);
-int hardclockintr(struct trapframe *frame);
-int statclockintr(struct trapframe *frame);
+int hardclockintr(void);
void startprofclock(struct proc *);
void stopprofclock(struct proc *);
void cpu_startprofclock(void);
void cpu_stopprofclock(void);
+void cpu_idleclock(void);
+void cpu_activeclock(void);
+extern int cpu_disable_deep_sleep;
int cr_cansee(struct ucred *u1, struct ucred *u2);
int cr_canseesocket(struct ucred *cred, struct socket *so);
diff --git a/sys/sys/timeet.h b/sys/sys/timeet.h
index bc713d6..87392a2 100644
--- a/sys/sys/timeet.h
+++ b/sys/sys/timeet.h
@@ -83,8 +83,8 @@ struct eventtimer {
};
extern struct mtx et_eventtimers_mtx;
-#define ET_LOCK() mtx_lock_spin(&et_eventtimers_mtx)
-#define ET_UNLOCK() mtx_unlock_spin(&et_eventtimers_mtx)
+#define ET_LOCK() mtx_lock(&et_eventtimers_mtx)
+#define ET_UNLOCK() mtx_unlock(&et_eventtimers_mtx)
/* Driver API */
int et_register(struct eventtimer *et);
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
index d5a818b..3249788 100644
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@@ -70,6 +70,7 @@ u_int64_t tc_getfrequency(void);
void tc_init(struct timecounter *tc);
void tc_setclock(struct timespec *ts);
void tc_ticktock(void);
+void cpu_tick_calibration(void);
#ifdef SYSCTL_DECL
SYSCTL_DECL(_kern_timecounter);
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index f479bbe..6d7a53b 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -261,7 +261,7 @@ lapic_init(vm_paddr_t addr)
lapic_et.et_quality = 600;
if (!arat) {
lapic_et.et_flags |= ET_FLAGS_C3STOP;
- lapic_et.et_quality -= 100;
+ lapic_et.et_quality -= 200;
}
lapic_et.et_frequency = 0;
/* We don't know frequency yet, so trying to guess. */
OpenPOWER on IntegriCloud