summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorneel <neel@FreeBSD.org>2013-12-07 23:11:12 +0000
committerneel <neel@FreeBSD.org>2013-12-07 23:11:12 +0000
commite7ebb9541a8be6ea07b5eccf2bff421fe6d5e84e (patch)
treebae489a2317c3cb8e1ef29791ecee7e8cd5c1529
parentf24ecdf1041ec511cab3b3b8baf76e2ccccee14a (diff)
downloadFreeBSD-src-e7ebb9541a8be6ea07b5eccf2bff421fe6d5e84e.zip
FreeBSD-src-e7ebb9541a8be6ea07b5eccf2bff421fe6d5e84e.tar.gz
Use callout(9) to drive the vlapic timer instead of clocking it on each VM exit.
This decouples the guest's 'hz' from the host's 'hz' setting. For e.g. it is now possible to have a guest run at 'hz=1000' while the host is at 'hz=100'. Discussed with: grehan@ Tested by: Tycho Nightingale (tycho.nightingale@pluribusnetworks.com)
-rw-r--r--sys/amd64/vmm/intel/vmx.c1
-rw-r--r--sys/amd64/vmm/io/vlapic.c339
-rw-r--r--sys/amd64/vmm/io/vlapic.h1
-rw-r--r--sys/amd64/vmm/vmm.c24
-rw-r--r--sys/amd64/vmm/vmm_lapic.c10
-rw-r--r--sys/amd64/vmm/vmm_lapic.h2
6 files changed, 233 insertions, 144 deletions
diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c
index f82615e..7e16598 100644
--- a/sys/amd64/vmm/intel/vmx.c
+++ b/sys/amd64/vmm/intel/vmx.c
@@ -1563,7 +1563,6 @@ vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap)
panic("vmx_run: error %d setting up pcpu defaults", error);
do {
- lapic_timer_tick(vmx->vm, vcpu);
vmx_inject_interrupts(vmx, vcpu);
vmx_run_trace(vmx, vcpu);
rc = vmx_setjmp(vmxctx);
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c
index a96048b..4c0cf59 100644
--- a/sys/amd64/vmm/io/vlapic.c
+++ b/sys/amd64/vmm/io/vlapic.c
@@ -30,8 +30,10 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
+#include <sys/lock.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/systm.h>
#include <sys/smp.h>
@@ -103,12 +105,15 @@ struct vlapic {
struct vm *vm;
int vcpuid;
- struct LAPIC apic;
+ struct LAPIC apic;
int esr_update;
- int divisor;
- int ccr_ticks;
+ struct callout callout; /* vlapic timer */
+ struct bintime timer_fire_bt; /* callout expiry time */
+ struct bintime timer_freq_bt; /* timer frequency */
+ struct bintime timer_period_bt; /* timer period */
+ struct mtx timer_mtx;
/*
* The 'isrvec_stk' is a stack of vectors injected by the local apic.
@@ -123,6 +128,21 @@ struct vlapic {
enum boot_state boot_state;
};
+/*
+ * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the
+ * vlapic_callout_handler() and vcpu accesses to the following registers:
+ * - initial count register aka icr_timer
+ * - current count register aka ccr_timer
+ * - divide config register aka dcr_timer
+ * - timer LVT register
+ *
+ * Note that the vlapic_callout_handler() does not write to any of these
+ * registers so they can be safely read from the vcpu context without locking.
+ */
+#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock(&((vlapic)->timer_mtx))
+#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock(&((vlapic)->timer_mtx))
+#define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx))
+
#define VLAPIC_BUS_FREQ tsc_freq
static int
@@ -170,11 +190,62 @@ vlapic_dump_lvt(uint32_t offset, uint32_t *lvt)
}
#endif
-static uint64_t
+static uint32_t
vlapic_get_ccr(struct vlapic *vlapic)
{
- struct LAPIC *lapic = &vlapic->apic;
- return lapic->ccr_timer;
+ struct bintime bt_now, bt_rem;
+ struct LAPIC *lapic;
+ uint32_t ccr;
+
+ ccr = 0;
+ lapic = &vlapic->apic;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+ if (callout_active(&vlapic->callout)) {
+ /*
+ * If the timer is scheduled to expire in the future then
+ * compute the value of 'ccr' based on the remaining time.
+ */
+ binuptime(&bt_now);
+ if (bintime_cmp(&vlapic->timer_fire_bt, &bt_now, >)) {
+ bt_rem = vlapic->timer_fire_bt;
+ bintime_sub(&bt_rem, &bt_now);
+ ccr += bt_rem.sec * BT2FREQ(&vlapic->timer_freq_bt);
+ ccr += bt_rem.frac / vlapic->timer_freq_bt.frac;
+ }
+ }
+ KASSERT(ccr <= lapic->icr_timer, ("vlapic_get_ccr: invalid ccr %#x, "
+ "icr_timer is %#x", ccr, lapic->icr_timer));
+ VLAPIC_CTR2(vlapic, "vlapic ccr_timer = %#x, icr_timer = %#x",
+ ccr, lapic->icr_timer);
+ VLAPIC_TIMER_UNLOCK(vlapic);
+ return (ccr);
+}
+
+static void
+vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr)
+{
+ struct LAPIC *lapic;
+ int divisor;
+
+ lapic = &vlapic->apic;
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ lapic->dcr_timer = dcr;
+ divisor = vlapic_timer_divisor(dcr);
+ VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor);
+
+ /*
+ * Update the timer frequency and the timer period.
+ *
+ * XXX changes to the frequency divider will not take effect until
+ * the timer is reloaded.
+ */
+ FREQ2BT(VLAPIC_BUS_FREQ / divisor, &vlapic->timer_freq_bt);
+ vlapic->timer_period_bt = vlapic->timer_freq_bt;
+ bintime_mul(&vlapic->timer_period_bt, lapic->icr_timer);
+
+ VLAPIC_TIMER_UNLOCK(vlapic);
}
static void
@@ -203,7 +274,7 @@ vlapic_reset(struct vlapic *vlapic)
memset(lapic, 0, sizeof(*lapic));
lapic->apr = vlapic->vcpuid;
vlapic_init_ipi(vlapic);
- vlapic->divisor = vlapic_timer_divisor(lapic->dcr_timer);
+ vlapic_set_dcr(vlapic, 0);
if (vlapic->vcpuid == 0)
vlapic->boot_state = BS_RUNNING; /* BSP */
@@ -250,30 +321,8 @@ vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready");
}
-static void
-vlapic_start_timer(struct vlapic *vlapic, uint32_t elapsed)
-{
- uint32_t icr_timer;
-
- icr_timer = vlapic->apic.icr_timer;
-
- vlapic->ccr_ticks = ticks;
- if (elapsed < icr_timer)
- vlapic->apic.ccr_timer = icr_timer - elapsed;
- else {
- /*
- * This can happen when the guest is trying to run its local
- * apic timer higher that the setting of 'hz' in the host.
- *
- * We deal with this by running the guest local apic timer
- * at the rate of the host's 'hz' setting.
- */
- vlapic->apic.ccr_timer = 0;
- }
-}
-
static __inline uint32_t *
-vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
+vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset)
{
struct LAPIC *lapic = &vlapic->apic;
int i;
@@ -285,6 +334,33 @@ vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
return ((&lapic->lvt_timer) + i);;
}
+static __inline uint32_t
+vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset)
+{
+
+ return (*vlapic_get_lvtptr(vlapic, offset));
+}
+
+static void
+vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val)
+{
+ uint32_t *lvtptr;
+ struct LAPIC *lapic;
+
+ lapic = &vlapic->apic;
+ lvtptr = vlapic_get_lvtptr(vlapic, offset);
+
+ if (offset == APIC_OFFSET_TIMER_LVT)
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ if (!(lapic->svr & APIC_SVR_ENABLE))
+ val |= APIC_LVT_M;
+ *lvtptr = val;
+
+ if (offset == APIC_OFFSET_TIMER_LVT)
+ VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
#if 1
static void
dump_isrvec_stk(struct vlapic *vlapic)
@@ -407,15 +483,16 @@ vlapic_process_eoi(struct vlapic *vlapic)
}
static __inline int
-vlapic_get_lvt_field(uint32_t *lvt, uint32_t mask)
+vlapic_get_lvt_field(uint32_t lvt, uint32_t mask)
{
- return (*lvt & mask);
+
+ return (lvt & mask);
}
static __inline int
vlapic_periodic_timer(struct vlapic *vlapic)
{
- uint32_t *lvt;
+ uint32_t lvt;
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
@@ -428,15 +505,109 @@ static void
vlapic_fire_timer(struct vlapic *vlapic)
{
int vector;
- uint32_t *lvt;
+ uint32_t lvt;
+
+ KASSERT(VLAPIC_TIMER_LOCKED(vlapic), ("vlapic_fire_timer not locked"));
lvt = vlapic_get_lvt(vlapic, APIC_OFFSET_TIMER_LVT);
if (!vlapic_get_lvt_field(lvt, APIC_LVTT_M)) {
vmm_stat_incr(vlapic->vm, vlapic->vcpuid, VLAPIC_INTR_TIMER, 1);
- vector = vlapic_get_lvt_field(lvt,APIC_LVTT_VECTOR);
+ vector = vlapic_get_lvt_field(lvt, APIC_LVTT_VECTOR);
vlapic_set_intr_ready(vlapic, vector, false);
+ vcpu_notify_event(vlapic->vm, vlapic->vcpuid);
+ }
+}
+
+static void
+vlapic_callout_handler(void *arg)
+{
+ struct vlapic *vlapic;
+ struct bintime bt, btnow;
+ sbintime_t rem_sbt;
+
+ vlapic = arg;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+ if (callout_pending(&vlapic->callout)) /* callout was reset */
+ goto done;
+
+ if (!callout_active(&vlapic->callout)) /* callout was stopped */
+ goto done;
+
+ callout_deactivate(&vlapic->callout);
+
+ KASSERT(vlapic->apic.icr_timer != 0, ("vlapic timer is disabled"));
+
+ vlapic_fire_timer(vlapic);
+
+ if (vlapic_periodic_timer(vlapic)) {
+ binuptime(&btnow);
+ KASSERT(bintime_cmp(&btnow, &vlapic->timer_fire_bt, >=),
+ ("vlapic callout at %#lx.%#lx, expected at %#lx.#%lx",
+ btnow.sec, btnow.frac, vlapic->timer_fire_bt.sec,
+ vlapic->timer_fire_bt.frac));
+
+ /*
+ * Compute the delta between when the timer was supposed to
+ * fire and the present time.
+ */
+ bt = btnow;
+ bintime_sub(&bt, &vlapic->timer_fire_bt);
+
+ rem_sbt = bttosbt(vlapic->timer_period_bt);
+ if (bintime_cmp(&bt, &vlapic->timer_period_bt, <)) {
+ /*
+ * Adjust the time until the next countdown downward
+ * to account for the lost time.
+ */
+ rem_sbt -= bttosbt(bt);
+ } else {
+ /*
+ * If the delta is greater than the timer period then
+ * just reset our time base instead of trying to catch
+ * up.
+ */
+ vlapic->timer_fire_bt = btnow;
+ VLAPIC_CTR2(vlapic, "vlapic timer lagging by %lu "
+ "usecs, period is %lu usecs - resetting time base",
+ bttosbt(bt) / SBT_1US,
+ bttosbt(vlapic->timer_period_bt) / SBT_1US);
+ }
+
+ bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
+ callout_reset_sbt(&vlapic->callout, rem_sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
}
+done:
+ VLAPIC_TIMER_UNLOCK(vlapic);
+}
+
+static void
+vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer)
+{
+ struct LAPIC *lapic;
+ sbintime_t sbt;
+
+ VLAPIC_TIMER_LOCK(vlapic);
+
+ lapic = &vlapic->apic;
+ lapic->icr_timer = icr_timer;
+
+ vlapic->timer_period_bt = vlapic->timer_freq_bt;
+ bintime_mul(&vlapic->timer_period_bt, icr_timer);
+
+ if (icr_timer != 0) {
+ binuptime(&vlapic->timer_fire_bt);
+ bintime_add(&vlapic->timer_fire_bt, &vlapic->timer_period_bt);
+
+ sbt = bttosbt(vlapic->timer_period_bt);
+ callout_reset_sbt(&vlapic->callout, sbt, 0,
+ vlapic_callout_handler, vlapic, 0);
+ } else
+ callout_stop(&vlapic->callout);
+
+ VLAPIC_TIMER_UNLOCK(vlapic);
}
static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
@@ -564,7 +735,6 @@ vlapic_pending_intr(struct vlapic *vlapic)
break;
}
}
- VLAPIC_CTR0(vlapic, "no pending intr");
return (-1);
}
@@ -613,9 +783,21 @@ lapic_set_svr(struct vlapic *vlapic, uint32_t new)
changed = old ^ new;
if ((changed & APIC_SVR_ENABLE) != 0) {
if ((new & APIC_SVR_ENABLE) == 0) {
+ /*
+ * The apic is now disabled so stop the apic timer.
+ */
VLAPIC_CTR0(vlapic, "vlapic is software-disabled");
+ VLAPIC_TIMER_LOCK(vlapic);
+ callout_stop(&vlapic->callout);
+ VLAPIC_TIMER_UNLOCK(vlapic);
} else {
+ /*
+ * The apic is now enabled so restart the apic timer
+ * if it is configured in periodic mode.
+ */
VLAPIC_CTR0(vlapic, "vlapic is software-enabled");
+ if (vlapic_periodic_timer(vlapic))
+ vlapic_set_icr_timer(vlapic, lapic->icr_timer);
}
}
lapic->svr = new;
@@ -691,8 +873,7 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
*data = lapic->icr_hi;
break;
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
- reg = vlapic_get_lvt(vlapic, offset);
- *data = *(reg);
+ *data = vlapic_get_lvt(vlapic, offset);
break;
case APIC_OFFSET_ICR:
*data = lapic->icr_timer;
@@ -717,7 +898,6 @@ int
vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
{
struct LAPIC *lapic = &vlapic->apic;
- uint32_t *reg;
int retval;
VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data);
@@ -760,21 +940,14 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
}
break;
case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT:
- reg = vlapic_get_lvt(vlapic, offset);
- if (!(lapic->svr & APIC_SVR_ENABLE)) {
- data |= APIC_LVT_M;
- }
- *reg = data;
- // vlapic_dump_lvt(offset, reg);
+ vlapic_set_lvt(vlapic, offset, data);
break;
case APIC_OFFSET_ICR:
- lapic->icr_timer = data;
- vlapic_start_timer(vlapic, 0);
+ vlapic_set_icr_timer(vlapic, data);
break;
case APIC_OFFSET_DCR:
- lapic->dcr_timer = data;
- vlapic->divisor = vlapic_timer_divisor(data);
+ vlapic_set_dcr(vlapic, data);
break;
case APIC_OFFSET_ESR:
@@ -796,70 +969,6 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
return (retval);
}
-int
-vlapic_timer_tick(struct vlapic *vlapic)
-{
- int curticks, delta, periodic, fired;
- uint32_t ccr;
- uint32_t decrement, leftover;
-
-restart:
- curticks = ticks;
- delta = curticks - vlapic->ccr_ticks;
-
- /* Local APIC timer is disabled */
- if (vlapic->apic.icr_timer == 0)
- return (-1);
-
- /* One-shot mode and timer has already counted down to zero */
- periodic = vlapic_periodic_timer(vlapic);
- if (!periodic && vlapic->apic.ccr_timer == 0)
- return (-1);
- /*
- * The 'curticks' and 'ccr_ticks' are out of sync by more than
- * 2^31 ticks. We deal with this by restarting the timer.
- */
- if (delta < 0) {
- vlapic_start_timer(vlapic, 0);
- goto restart;
- }
-
- fired = 0;
- decrement = (VLAPIC_BUS_FREQ / vlapic->divisor) / hz;
-
- vlapic->ccr_ticks = curticks;
- ccr = vlapic->apic.ccr_timer;
-
- while (delta-- > 0) {
- if (ccr > decrement) {
- ccr -= decrement;
- continue;
- }
-
- /* Trigger the local apic timer interrupt */
- vlapic_fire_timer(vlapic);
- if (periodic) {
- leftover = decrement - ccr;
- vlapic_start_timer(vlapic, leftover);
- ccr = vlapic->apic.ccr_timer;
- } else {
- /*
- * One-shot timer has counted down to zero.
- */
- ccr = 0;
- }
- fired = 1;
- break;
- }
-
- vlapic->apic.ccr_timer = ccr;
-
- if (!fired)
- return ((ccr / decrement) + 1);
- else
- return (0);
-}
-
struct vlapic *
vlapic_init(struct vm *vm, int vcpuid)
{
@@ -869,6 +978,9 @@ vlapic_init(struct vm *vm, int vcpuid)
vlapic->vm = vm;
vlapic->vcpuid = vcpuid;
+ mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_DEF);
+ callout_init(&vlapic->callout, 1);
+
vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
if (vcpuid == 0)
@@ -883,6 +995,7 @@ void
vlapic_cleanup(struct vlapic *vlapic)
{
+ callout_drain(&vlapic->callout);
free(vlapic, M_VLAPIC);
}
diff --git a/sys/amd64/vmm/io/vlapic.h b/sys/amd64/vmm/io/vlapic.h
index be0474e..926004d 100644
--- a/sys/amd64/vmm/io/vlapic.h
+++ b/sys/amd64/vmm/io/vlapic.h
@@ -95,7 +95,6 @@ int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data);
int vlapic_pending_intr(struct vlapic *vlapic);
void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
-int vlapic_timer_tick(struct vlapic *vlapic);
uint64_t vlapic_get_apicbase(struct vlapic *vlapic);
void vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val);
diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
index 82c34cf..af9d851 100644
--- a/sys/amd64/vmm/vmm.c
+++ b/sys/amd64/vmm/vmm.c
@@ -865,26 +865,13 @@ vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
{
struct vm_exit *vmexit;
struct vcpu *vcpu;
- int sleepticks, t;
+ int t, timo;
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
/*
- * Figure out the number of host ticks until the next apic
- * timer interrupt in the guest.
- */
- sleepticks = lapic_timer_tick(vm, vcpuid);
-
- /*
- * If the guest local apic timer is disabled then sleep for
- * a long time but not forever.
- */
- if (sleepticks < 0)
- sleepticks = hz;
-
- /*
* Do a final check for pending NMI or interrupts before
* really putting this thread to sleep.
*
@@ -893,12 +880,15 @@ vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
*/
if (!vm_nmi_pending(vm, vcpuid) &&
(intr_disabled || vlapic_pending_intr(vcpu->vlapic) < 0)) {
- if (sleepticks <= 0)
- panic("invalid sleepticks %d", sleepticks);
t = ticks;
vcpu_require_state_locked(vcpu, VCPU_SLEEPING);
if (vlapic_enabled(vcpu->vlapic)) {
- msleep_spin(vcpu, &vcpu->mtx, "vmidle", sleepticks);
+ /*
+ * XXX msleep_spin() is not interruptible so use the
+ * 'timo' to put an upper bound on the sleep time.
+ */
+ timo = hz;
+ msleep_spin(vcpu, &vcpu->mtx, "vmidle", timo);
} else {
/*
* Spindown the vcpu if the apic is disabled and it
diff --git a/sys/amd64/vmm/vmm_lapic.c b/sys/amd64/vmm/vmm_lapic.c
index 5217529..0b54442 100644
--- a/sys/amd64/vmm/vmm_lapic.c
+++ b/sys/amd64/vmm/vmm_lapic.c
@@ -80,16 +80,6 @@ lapic_set_intr(struct vm *vm, int cpu, int vector, bool level)
return (0);
}
-int
-lapic_timer_tick(struct vm *vm, int cpu)
-{
- struct vlapic *vlapic;
-
- vlapic = vm_lapic(vm, cpu);
-
- return (vlapic_timer_tick(vlapic));
-}
-
static boolean_t
x2apic_msr(u_int msr)
{
diff --git a/sys/amd64/vmm/vmm_lapic.h b/sys/amd64/vmm/vmm_lapic.h
index 1461185..7a8a509 100644
--- a/sys/amd64/vmm/vmm_lapic.h
+++ b/sys/amd64/vmm/vmm_lapic.h
@@ -40,8 +40,6 @@ int lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
int lapic_mmio_write(void *vm, int cpu, uint64_t gpa,
uint64_t wval, int size, void *arg);
-int lapic_timer_tick(struct vm *vm, int cpu);
-
/*
* Returns a vector between 32 and 255 if an interrupt is pending in the
* IRR that can be delivered based on the current state of ISR and TPR.
OpenPOWER on IntegriCloud