summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2011-05-13 12:39:37 +0000
committermav <mav@FreeBSD.org>2011-05-13 12:39:37 +0000
commit1881f29e6e8207fb99d0860d582c91e714f12a95 (patch)
tree8281c60b958f7d6ebfdb7b2134c07e51e43407b7
parent78eaf5d43193075e2a63856e4329c0383e0eab02 (diff)
downloadFreeBSD-src-1881f29e6e8207fb99d0860d582c91e714f12a95.zip
FreeBSD-src-1881f29e6e8207fb99d0860d582c91e714f12a95.tar.gz
Refactor Xen PV code to use new event timers subsystem. That uses one-shot
Xen timer and time counter to provide one-shot and periodic time events. On my tests this reduces idle interruts rate down to about 30Hz, and accor- ding to Xen VM Manager reduces host CPU load by three times comparing to the previous periodic 100Hz clock. Also now, when needed, it is possible to increase HZ rate without useless CPU burning during idle periods. Now only ia64 and some ARMs left not migrated to the new event timers.
-rw-r--r--sys/i386/i386/machdep.c14
-rw-r--r--sys/i386/xen/clock.c197
-rw-r--r--sys/i386/xen/mp_machdep.c5
-rw-r--r--sys/kern/kern_clocksource.c5
4 files changed, 112 insertions, 109 deletions
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 265a5c4..fbf444a 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1351,7 +1351,9 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi;
void
cpu_idle(int busy)
{
+#ifndef XEN
uint64_t msr;
+#endif
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
busy, curcpu);
@@ -1367,34 +1369,32 @@ cpu_idle(int busy)
goto out;
}
}
+#endif
/* If we have time - switch timers into idle mode. */
if (!busy) {
critical_enter();
cpu_idleclock();
}
-#endif
- /* Apply AMD APIC timer C1E workaround. */
- if (cpu_ident_amdc1e
#ifndef XEN
- && cpu_disable_deep_sleep
-#endif
- ) {
+ /* Apply AMD APIC timer C1E workaround. */
+ if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
msr = rdmsr(MSR_AMDK8_IPM);
if (msr & AMDK8_CMPHALT)
wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
}
+#endif
/* Call main idle method. */
cpu_idle_fn(busy);
-#ifndef XEN
/* Switch timers mack into active mode. */
if (!busy) {
cpu_activeclock();
critical_exit();
}
+#ifndef XEN
out:
#endif
CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
diff --git a/sys/i386/xen/clock.c b/sys/i386/xen/clock.c
index 091c8c7..f5965bf 100644
--- a/sys/i386/xen/clock.c
+++ b/sys/i386/xen/clock.c
@@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/time.h>
+#include <sys/timeet.h>
#include <sys/timetc.h>
#include <sys/kernel.h>
#include <sys/limits.h>
@@ -301,38 +302,44 @@ static struct timecounter xen_timecounter = {
0 /* quality */
};
+static struct eventtimer xen_et;
+
+struct xen_et_state {
+ int mode;
+#define MODE_STOP 0
+#define MODE_PERIODIC 1
+#define MODE_ONESHOT 2
+ int64_t period;
+ int64_t next;
+};
+
+static DPCPU_DEFINE(struct xen_et_state, et_state);
+
static int
clkintr(void *arg)
{
- int64_t delta_cpu, delta;
- struct trapframe *frame = (struct trapframe *)arg;
+ int64_t now;
int cpu = smp_processor_id();
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
+ struct xen_et_state *state = DPCPU_PTR(et_state);
do {
__get_time_values_from_xen();
-
- delta = delta_cpu =
- shadow->system_timestamp + get_nsec_offset(shadow);
- delta -= processed_system_time;
- delta_cpu -= per_cpu(processed_system_time, cpu);
-
+ now = shadow->system_timestamp + get_nsec_offset(shadow);
} while (!time_values_up_to_date(cpu));
-
- if (unlikely(delta < (int64_t)0) || unlikely(delta_cpu < (int64_t)0)) {
- printf("Timer ISR: Time went backwards: %lld\n", delta);
- return (FILTER_HANDLED);
- }
-
+
/* Process elapsed ticks since last call. */
- while (delta >= NS_PER_TICK) {
- delta -= NS_PER_TICK;
- processed_system_time += NS_PER_TICK;
- per_cpu(processed_system_time, cpu) += NS_PER_TICK;
- if (PCPU_GET(cpuid) == 0)
- hardclock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
- else
- hardclock_cpu(TRAPF_USERMODE(frame));
+ processed_system_time = now;
+ if (state->mode == MODE_PERIODIC) {
+ while (now >= state->next) {
+ state->next += state->period;
+ if (xen_et.et_active)
+ xen_et.et_event_cb(&xen_et, xen_et.et_arg);
+ }
+ HYPERVISOR_set_timer_op(state->next + 50000);
+ } else if (state->mode == MODE_ONESHOT) {
+ if (xen_et.et_active)
+ xen_et.et_event_cb(&xen_et, xen_et.et_arg);
}
/*
* Take synchronised time from Xen once a minute if we're not
@@ -484,12 +491,14 @@ DELAY(int n)
void
timer_restore(void)
{
+ struct xen_et_state *state = DPCPU_PTR(et_state);
+
/* Get timebases for new environment. */
__get_time_values_from_xen();
/* Reset our own concept of passage of system time. */
processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
- per_cpu(processed_system_time, 0) = processed_system_time;
+ state->next = processed_system_time;
}
void
@@ -503,7 +512,6 @@ startrtclock()
/* initialize xen values */
__get_time_values_from_xen();
processed_system_time = per_cpu(shadow_time, 0).system_timestamp;
- per_cpu(processed_system_time, 0) = processed_system_time;
__cpu_khz = 1000000ULL << 32;
info = &HYPERVISOR_shared_info->vcpu_info[0].time;
@@ -759,7 +767,49 @@ resettodr()
}
#endif
-static struct vcpu_set_periodic_timer xen_set_periodic_tick;
+static int
+xen_et_start(struct eventtimer *et,
+ struct bintime *first, struct bintime *period)
+{
+ struct xen_et_state *state = DPCPU_PTR(et_state);
+ struct shadow_time_info *shadow;
+ int64_t fperiod;
+
+ __get_time_values_from_xen();
+
+ if (period != NULL) {
+ state->mode = MODE_PERIODIC;
+ state->period = (1000000000LL *
+ (uint32_t)(period->frac >> 32)) >> 32;
+ if (period->sec != 0)
+ state->period += 1000000000LL * period->sec;
+ } else {
+ state->mode = MODE_ONESHOT;
+ state->period = 0;
+ }
+ if (first != NULL) {
+ fperiod = (1000000000LL * (uint32_t)(first->frac >> 32)) >> 32;
+ if (first->sec != 0)
+ fperiod += 1000000000LL * first->sec;
+ } else
+ fperiod = state->period;
+
+ shadow = &per_cpu(shadow_time, smp_processor_id());
+ state->next = shadow->system_timestamp + get_nsec_offset(shadow);
+ state->next += fperiod;
+ HYPERVISOR_set_timer_op(state->next + 50000);
+ return (0);
+}
+
+static int
+xen_et_stop(struct eventtimer *et)
+{
+ struct xen_et_state *state = DPCPU_PTR(et_state);
+
+ state->mode = MODE_STOP;
+ HYPERVISOR_set_timer_op(0);
+ return (0);
+}
/*
* Start clocks running.
@@ -770,56 +820,48 @@ cpu_initclocks(void)
unsigned int time_irq;
int error;
- xen_set_periodic_tick.period_ns = NS_PER_TICK;
-
- HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, 0,
- &xen_set_periodic_tick);
-
- error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk",
- clkintr, NULL, NULL,
- INTR_TYPE_CLK, &time_irq);
+ HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, 0, NULL);
+ error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "cpu0:timer",
+ clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq);
if (error)
panic("failed to register clock interrupt\n");
/* should fast clock be enabled ? */
-
+
+ bzero(&xen_et, sizeof(xen_et));
+ xen_et.et_name = "ixen";
+ xen_et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT |
+ ET_FLAGS_PERCPU;
+ xen_et.et_quality = 600;
+ xen_et.et_frequency = 0;
+ xen_et.et_min_period.sec = 0;
+ xen_et.et_min_period.frac = 0x00400000LL << 32;
+ xen_et.et_max_period.sec = 2;
+ xen_et.et_max_period.frac = 0;
+ xen_et.et_start = xen_et_start;
+ xen_et.et_stop = xen_et_stop;
+ xen_et.et_priv = NULL;
+ et_register(&xen_et);
+
+ cpu_initclocks_bsp();
}
int
ap_cpu_initclocks(int cpu)
{
+ char buf[MAXCOMLEN + 1];
unsigned int time_irq;
int error;
- xen_set_periodic_tick.period_ns = NS_PER_TICK;
-
- HYPERVISOR_vcpu_op(VCPUOP_set_periodic_timer, cpu,
- &xen_set_periodic_tick);
- error = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk",
- clkintr, NULL, NULL,
- INTR_TYPE_CLK, &time_irq);
+ HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL);
+ snprintf(buf, sizeof(buf), "cpu%d:timer", cpu);
+ error = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, buf,
+ clkintr, NULL, NULL, INTR_TYPE_CLK, &time_irq);
if (error)
panic("failed to register clock interrupt\n");
-
return (0);
}
-
-void
-cpu_startprofclock(void)
-{
-
- printf("cpu_startprofclock: profiling clock is not supported\n");
-}
-
-void
-cpu_stopprofclock(void)
-{
-
- printf("cpu_stopprofclock: profiling clock is not supported\n");
-}
-#define NSEC_PER_USEC 1000
-
static uint32_t
xen_get_timecount(struct timecounter *tc)
{
@@ -842,45 +884,11 @@ get_system_time(int ticks)
return processed_system_time + (ticks * NS_PER_TICK);
}
-/*
- * Track behavior of cur_timer->get_offset() functionality in timer_tsc.c
- */
-
-
-/* Convert jiffies to system time. */
-static uint64_t
-ticks_to_system_time(int newticks)
-{
- int delta;
- uint64_t st;
-
- delta = newticks - ticks;
- if (delta < 1) {
- /* Triggers in some wrap-around cases,
- * but that's okay:
- * we just end up with a shorter timeout. */
- st = processed_system_time + NS_PER_TICK;
- } else if (((unsigned int)delta >> (BITS_PER_LONG-3)) != 0) {
- /* Very long timeout means there is no pending timer.
- * We indicate this to Xen by passing zero timeout. */
- st = 0;
- } else {
- st = processed_system_time + delta * (uint64_t)NS_PER_TICK;
- }
-
- return (st);
-}
-
void
idle_block(void)
{
- uint64_t timeout;
-
- timeout = ticks_to_system_time(ticks + 1) + NS_PER_TICK/2;
- __get_time_values_from_xen();
- PANIC_IF(HYPERVISOR_set_timer_op(timeout) != 0);
- HYPERVISOR_sched_op(SCHEDOP_block, 0);
+ HYPERVISOR_sched_op(SCHEDOP_block, 0);
}
int
@@ -903,6 +911,3 @@ timer_spkr_setfreq(int freq)
}
-
-
-
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 6e0fa23..1bfc0e9 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -628,8 +628,11 @@ init_secondary(void)
while (smp_started == 0)
ia32_pause();
-
PCPU_SET(curthread, PCPU_GET(idlethread));
+
+ /* Start per-CPU event timers. */
+ cpu_initclocks_ap();
+
/* enter the scheduler */
sched_throw(NULL);
diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c
index d55d85c..604bd2d 100644
--- a/sys/kern/kern_clocksource.c
+++ b/sys/kern/kern_clocksource.c
@@ -31,9 +31,6 @@ __FBSDID("$FreeBSD$");
* Common routines to manage event timers hardware.
*/
-/* XEN has own timer routines now. */
-#ifndef XEN
-
#include "opt_device_polling.h"
#include "opt_kdtrace.h"
@@ -899,5 +896,3 @@ sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
0, 0, sysctl_kern_eventtimer_periodic, "I", "Enable event timer periodic mode");
-
-#endif
OpenPOWER on IntegriCloud