diff options
Diffstat (limited to 'sys/amd64/vmm/io/vlapic.c')
-rw-r--r-- | sys/amd64/vmm/io/vlapic.c | 669 |
1 files changed, 398 insertions, 271 deletions
diff --git a/sys/amd64/vmm/io/vlapic.c b/sys/amd64/vmm/io/vlapic.c index 695040d..2395247 100644 --- a/sys/amd64/vmm/io/vlapic.c +++ b/sys/amd64/vmm/io/vlapic.c @@ -37,108 +37,34 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/smp.h> -#include <machine/clock.h> #include <x86/specialreg.h> #include <x86/apicreg.h> +#include <machine/clock.h> +#include <machine/smp.h> + #include <machine/vmm.h> -#include "vmm_stat.h" +#include "vmm_ipi.h" #include "vmm_lapic.h" #include "vmm_ktr.h" +#include "vmm_stat.h" + #include "vlapic.h" +#include "vlapic_priv.h" #include "vioapic.h" -#define VLAPIC_CTR0(vlapic, format) \ - VCPU_CTR0((vlapic)->vm, (vlapic)->vcpuid, format) - -#define VLAPIC_CTR1(vlapic, format, p1) \ - VCPU_CTR1((vlapic)->vm, (vlapic)->vcpuid, format, p1) - -#define VLAPIC_CTR2(vlapic, format, p1, p2) \ - VCPU_CTR2((vlapic)->vm, (vlapic)->vcpuid, format, p1, p2) - -#define VLAPIC_CTR_IRR(vlapic, msg) \ -do { \ - uint32_t *irrptr = &(vlapic)->apic.irr0; \ - irrptr[0] = irrptr[0]; /* silence compiler */ \ - VLAPIC_CTR1((vlapic), msg " irr0 0x%08x", irrptr[0 << 2]); \ - VLAPIC_CTR1((vlapic), msg " irr1 0x%08x", irrptr[1 << 2]); \ - VLAPIC_CTR1((vlapic), msg " irr2 0x%08x", irrptr[2 << 2]); \ - VLAPIC_CTR1((vlapic), msg " irr3 0x%08x", irrptr[3 << 2]); \ - VLAPIC_CTR1((vlapic), msg " irr4 0x%08x", irrptr[4 << 2]); \ - VLAPIC_CTR1((vlapic), msg " irr5 0x%08x", irrptr[5 << 2]); \ - VLAPIC_CTR1((vlapic), msg " irr6 0x%08x", irrptr[6 << 2]); \ - VLAPIC_CTR1((vlapic), msg " irr7 0x%08x", irrptr[7 << 2]); \ -} while (0) - -#define VLAPIC_CTR_ISR(vlapic, msg) \ -do { \ - uint32_t *isrptr = &(vlapic)->apic.isr0; \ - isrptr[0] = isrptr[0]; /* silence compiler */ \ - VLAPIC_CTR1((vlapic), msg " isr0 0x%08x", isrptr[0 << 2]); \ - VLAPIC_CTR1((vlapic), msg " isr1 0x%08x", isrptr[1 << 2]); \ - VLAPIC_CTR1((vlapic), msg " isr2 0x%08x", isrptr[2 << 2]); \ - VLAPIC_CTR1((vlapic), msg " isr3 0x%08x", isrptr[3 << 2]); \ - VLAPIC_CTR1((vlapic), msg " isr4 0x%08x", isrptr[4 << 2]); \ - VLAPIC_CTR1((vlapic), msg " isr5 0x%08x", isrptr[5 << 2]); \ - VLAPIC_CTR1((vlapic), msg " isr6 0x%08x", isrptr[6 << 2]); \ - VLAPIC_CTR1((vlapic), msg " isr7 0x%08x", isrptr[7 << 2]); \ -} while (0) - -static MALLOC_DEFINE(M_VLAPIC, "vlapic", "vlapic"); - #define PRIO(x) ((x) >> 4) #define VLAPIC_VERSION (16) -#define VLAPIC_MAXLVT_ENTRIES (APIC_LVT_CMCI) #define x2apic(vlapic) (((vlapic)->msr_apicbase & APICBASE_X2APIC) ? 1 : 0) -enum boot_state { - BS_INIT, - BS_SIPI, - BS_RUNNING -}; - -struct vlapic { - struct vm *vm; - int vcpuid; - - struct LAPIC apic; - - uint32_t esr_pending; - int esr_firing; - - struct callout callout; /* vlapic timer */ - struct bintime timer_fire_bt; /* callout expiry time */ - struct bintime timer_freq_bt; /* timer frequency */ - struct bintime timer_period_bt; /* timer period */ - struct mtx timer_mtx; - - /* - * The 'isrvec_stk' is a stack of vectors injected by the local apic. - * A vector is popped from the stack when the processor does an EOI. - * The vector on the top of the stack is used to compute the - * Processor Priority in conjunction with the TPR. - */ - uint8_t isrvec_stk[ISRVEC_STK_SIZE]; - int isrvec_stk_top; - - uint64_t msr_apicbase; - enum boot_state boot_state; -}; - /* * The 'vlapic->timer_mtx' is used to provide mutual exclusion between the - * vlapic_callout_handler() and vcpu accesses to the following registers: - * - initial count register aka icr_timer - * - current count register aka ccr_timer - * - divide config register aka dcr_timer + * vlapic_callout_handler() and vcpu accesses to: + * - timer_freq_bt, timer_period_bt, timer_fire_bt * - timer LVT register - * - * Note that the vlapic_callout_handler() does not write to any of these - * registers so they can be safely read from the vcpu context without locking. */ #define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx)) #define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx)) @@ -156,72 +82,71 @@ vlapic_get_id(struct vlapic *vlapic) return (vlapic->vcpuid << 24); } -static __inline uint32_t -vlapic_get_ldr(struct vlapic *vlapic) +static uint32_t +x2apic_ldr(struct vlapic *vlapic) { - struct LAPIC *lapic; int apicid; uint32_t ldr; - lapic = &vlapic->apic; - if (x2apic(vlapic)) { - apicid = vlapic_get_id(vlapic); - ldr = 1 << (apicid & 0xf); - ldr |= (apicid & 0xffff0) << 12; - return (ldr); - } else - return (lapic->ldr); + apicid = vlapic_get_id(vlapic); + ldr = 1 << (apicid & 0xf); + ldr |= (apicid & 0xffff0) << 12; + return (ldr); } -static __inline uint32_t -vlapic_get_dfr(struct vlapic *vlapic) +void +vlapic_dfr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; - lapic = &vlapic->apic; - if (x2apic(vlapic)) - return (0); - else - return (lapic->dfr); -} - -static void -vlapic_set_dfr(struct vlapic *vlapic, uint32_t data) -{ - uint32_t dfr; - struct LAPIC *lapic; - + lapic = vlapic->apic_page; if (x2apic(vlapic)) { - VM_CTR1(vlapic->vm, "write to DFR in x2apic mode: %#x", data); + VM_CTR1(vlapic->vm, "ignoring write to DFR in x2apic mode: %#x", + lapic->dfr); + lapic->dfr = 0; return; } - lapic = &vlapic->apic; - dfr = (lapic->dfr & APIC_DFR_RESERVED) | (data & APIC_DFR_MODEL_MASK); - if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) + lapic->dfr &= APIC_DFR_MODEL_MASK; + lapic->dfr |= APIC_DFR_RESERVED; + + if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) VLAPIC_CTR0(vlapic, "vlapic DFR in Flat Model"); - else if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) + else if ((lapic->dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_CLUSTER) VLAPIC_CTR0(vlapic, "vlapic DFR in Cluster Model"); else - VLAPIC_CTR1(vlapic, "vlapic DFR in Unknown Model %#x", dfr); - - lapic->dfr = dfr; + VLAPIC_CTR1(vlapic, "DFR in Unknown Model %#x", lapic->dfr); } -static void -vlapic_set_ldr(struct vlapic *vlapic, uint32_t data) +void +vlapic_ldr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; + lapic = vlapic->apic_page; + /* LDR is read-only in x2apic mode */ if (x2apic(vlapic)) { - VLAPIC_CTR1(vlapic, "write to LDR in x2apic mode: %#x", data); - return; + VLAPIC_CTR1(vlapic, "ignoring write to LDR in x2apic mode: %#x", + lapic->ldr); + lapic->ldr = x2apic_ldr(vlapic); + } else { + lapic->ldr &= ~APIC_LDR_RESERVED; + VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); } +} - lapic = &vlapic->apic; - lapic->ldr = data & ~APIC_LDR_RESERVED; - VLAPIC_CTR1(vlapic, "vlapic LDR set to %#x", lapic->ldr); +void +vlapic_id_write_handler(struct vlapic *vlapic) +{ + struct LAPIC *lapic; + + /* + * We don't allow the ID register to be modified so reset it back to + * its default value. + */ + lapic = vlapic->apic_page; + lapic->id = vlapic_get_id(vlapic); } static int @@ -249,16 +174,6 @@ vlapic_timer_divisor(uint32_t dcr) } } -static void -vlapic_mask_lvts(uint32_t *lvts, int num_lvt) -{ - int i; - for (i = 0; i < num_lvt; i++) { - *lvts |= APIC_LVT_M; - lvts += 4; - } -} - #if 0 static inline void vlapic_dump_lvt(uint32_t offset, uint32_t *lvt) @@ -277,7 +192,7 @@ vlapic_get_ccr(struct vlapic *vlapic) uint32_t ccr; ccr = 0; - lapic = &vlapic->apic; + lapic = vlapic->apic_page; VLAPIC_TIMER_LOCK(vlapic); if (callout_active(&vlapic->callout)) { @@ -301,18 +216,18 @@ vlapic_get_ccr(struct vlapic *vlapic) return (ccr); } -static void -vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr) +void +vlapic_dcr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; int divisor; - lapic = &vlapic->apic; + lapic = vlapic->apic_page; VLAPIC_TIMER_LOCK(vlapic); - lapic->dcr_timer = dcr; - divisor = vlapic_timer_divisor(dcr); - VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", dcr, divisor); + divisor = vlapic_timer_divisor(lapic->dcr_timer); + VLAPIC_CTR2(vlapic, "vlapic dcr_timer=%#x, divisor=%d", + lapic->dcr_timer, divisor); /* * Update the timer frequency and the timer period. @@ -327,57 +242,42 @@ vlapic_set_dcr(struct vlapic *vlapic, uint32_t dcr) VLAPIC_TIMER_UNLOCK(vlapic); } -static void -vlapic_update_errors(struct vlapic *vlapic) -{ - struct LAPIC *lapic = &vlapic->apic; - lapic->esr = vlapic->esr_pending; - vlapic->esr_pending = 0; -} - -static void -vlapic_reset(struct vlapic *vlapic) +void +vlapic_esr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; - lapic = &vlapic->apic; - bzero(lapic, sizeof(struct LAPIC)); - - lapic->version = VLAPIC_VERSION; - lapic->version |= (VLAPIC_MAXLVT_ENTRIES << MAXLVTSHIFT); - lapic->dfr = 0xffffffff; - lapic->svr = APIC_SVR_VECTOR; - vlapic_mask_lvts(&lapic->lvt_timer, 6); - vlapic_mask_lvts(&lapic->lvt_cmci, 1); - vlapic_set_dcr(vlapic, 0); - - if (vlapic->vcpuid == 0) - vlapic->boot_state = BS_RUNNING; /* BSP */ - else - vlapic->boot_state = BS_INIT; /* AP */ + lapic = vlapic->apic_page; + lapic->esr = vlapic->esr_pending; + vlapic->esr_pending = 0; } -void +int vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) { - struct LAPIC *lapic = &vlapic->apic; - uint32_t *irrptr, *tmrptr, mask; - int idx; + struct LAPIC *lapic; + uint32_t *irrptr, *tmrptr, mask; + int idx; - if (vector < 0 || vector >= 256) - panic("vlapic_set_intr_ready: invalid vector %d\n", vector); + KASSERT(vector >= 0 && vector < 256, ("invalid vector %d", vector)); + lapic = vlapic->apic_page; if (!(lapic->svr & APIC_SVR_ENABLE)) { VLAPIC_CTR1(vlapic, "vlapic is software disabled, ignoring " "interrupt %d", vector); - return; + return (0); } if (vector < 16) { vlapic_set_error(vlapic, APIC_ESR_RECEIVE_ILLEGAL_VECTOR); - return; + VLAPIC_CTR1(vlapic, "vlapic ignoring interrupt to vector %d", + vector); + return (1); } - + + if (vlapic->ops.set_intr_ready) + return ((*vlapic->ops.set_intr_ready)(vlapic, vector, level)); + idx = (vector / 32) * 4; mask = 1 << (vector % 32); @@ -385,23 +285,22 @@ vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level) atomic_set_int(&irrptr[idx], mask); /* - * Upon acceptance of an interrupt into the IRR the corresponding - * TMR bit is cleared for edge-triggered interrupts and set for - * level-triggered interrupts. + * Verify that the trigger-mode of the interrupt matches with + * the vlapic TMR registers. */ tmrptr = &lapic->tmr0; - if (level) - atomic_set_int(&tmrptr[idx], mask); - else - atomic_clear_int(&tmrptr[idx], mask); + KASSERT((tmrptr[idx] & mask) == (level ? mask : 0), + ("vlapic TMR[%d] is 0x%08x but interrupt is %s-triggered", + idx / 4, tmrptr[idx], level ? "level" : "edge")); VLAPIC_CTR_IRR(vlapic, "vlapic_set_intr_ready"); + return (1); } static __inline uint32_t * vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) { - struct LAPIC *lapic = &vlapic->apic; + struct LAPIC *lapic = vlapic->apic_page; int i; switch (offset) { @@ -415,24 +314,65 @@ vlapic_get_lvtptr(struct vlapic *vlapic, uint32_t offset) } } +static __inline int +lvt_off_to_idx(uint32_t offset) +{ + int index; + + switch (offset) { + case APIC_OFFSET_CMCI_LVT: + index = APIC_LVT_CMCI; + break; + case APIC_OFFSET_TIMER_LVT: + index = APIC_LVT_TIMER; + break; + case APIC_OFFSET_THERM_LVT: + index = APIC_LVT_THERMAL; + break; + case APIC_OFFSET_PERF_LVT: + index = APIC_LVT_PMC; + break; + case APIC_OFFSET_LINT0_LVT: + index = APIC_LVT_LINT0; + break; + case APIC_OFFSET_LINT1_LVT: + index = APIC_LVT_LINT1; + break; + case APIC_OFFSET_ERROR_LVT: + index = APIC_LVT_ERROR; + break; + default: + index = -1; + break; + } + KASSERT(index >= 0 && index <= VLAPIC_MAXLVT_INDEX, ("lvt_off_to_idx: " + "invalid lvt index %d for offset %#x", index, offset)); + + return (index); +} + static __inline uint32_t vlapic_get_lvt(struct vlapic *vlapic, uint32_t offset) { + int idx; + uint32_t val; - return (*vlapic_get_lvtptr(vlapic, offset)); + idx = lvt_off_to_idx(offset); + val = atomic_load_acq_32(&vlapic->lvt_last[idx]); + return (val); } -static void -vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val) +void +vlapic_lvt_write_handler(struct vlapic *vlapic, uint32_t offset) { - uint32_t *lvtptr, mask; + uint32_t *lvtptr, mask, val; struct LAPIC *lapic; + int idx; - lapic = &vlapic->apic; + lapic = vlapic->apic_page; lvtptr = vlapic_get_lvtptr(vlapic, offset); - - if (offset == APIC_OFFSET_TIMER_LVT) - VLAPIC_TIMER_LOCK(vlapic); + val = *lvtptr; + idx = lvt_off_to_idx(offset); if (!(lapic->svr & APIC_SVR_ENABLE)) val |= APIC_LVT_M; @@ -451,10 +391,36 @@ vlapic_set_lvt(struct vlapic *vlapic, uint32_t offset, uint32_t val) mask |= APIC_LVT_DM; break; } - *lvtptr = val & mask; + val &= mask; + *lvtptr = val; + atomic_store_rel_32(&vlapic->lvt_last[idx], val); +} + +static void +vlapic_mask_lvts(struct vlapic *vlapic) +{ + struct LAPIC *lapic = vlapic->apic_page; + + lapic->lvt_cmci |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_CMCI_LVT); + + lapic->lvt_timer |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_TIMER_LVT); - if (offset == APIC_OFFSET_TIMER_LVT) - VLAPIC_TIMER_UNLOCK(vlapic); + lapic->lvt_thermal |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_THERM_LVT); + + lapic->lvt_pcint |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_PERF_LVT); + + lapic->lvt_lint0 |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT0_LVT); + + lapic->lvt_lint1 |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_LINT1_LVT); + + lapic->lvt_error |= APIC_LVT_M; + vlapic_lvt_write_handler(vlapic, APIC_OFFSET_ERROR_LVT); } static int @@ -474,8 +440,8 @@ vlapic_fire_lvt(struct vlapic *vlapic, uint32_t lvt) vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); return (0); } - vlapic_set_intr_ready(vlapic, vec, false); - vcpu_notify_event(vlapic->vm, vlapic->vcpuid); + if (vlapic_set_intr_ready(vlapic, vec, false)) + vcpu_notify_event(vlapic->vm, vlapic->vcpuid, true); break; case APIC_LVT_DM_NMI: vm_inject_nmi(vlapic->vm, vlapic->vcpuid); @@ -494,7 +460,7 @@ dump_isrvec_stk(struct vlapic *vlapic) int i; uint32_t *isrptr; - isrptr = &vlapic->apic.isr0; + isrptr = &vlapic->apic_page->isr0; for (i = 0; i < 8; i++) printf("ISR%d 0x%08x\n", i, isrptr[i * 4]); @@ -519,7 +485,7 @@ vlapic_update_ppr(struct vlapic *vlapic) * bits is set in the ISRx registers. */ isrvec = vlapic->isrvec_stk[vlapic->isrvec_stk_top]; - tpr = vlapic->apic.tpr; + tpr = vlapic->apic_page->tpr; #if 1 { @@ -548,7 +514,7 @@ vlapic_update_ppr(struct vlapic *vlapic) * corresponding entry on the isrvec stack. */ i = 1; - isrptr = &vlapic->apic.isr0; + isrptr = &vlapic->apic_page->isr0; for (vector = 0; vector < 256; vector++) { idx = (vector / 32) * 4; if (isrptr[idx] & (1 << (vector % 32))) { @@ -568,14 +534,14 @@ vlapic_update_ppr(struct vlapic *vlapic) else ppr = isrvec & 0xf0; - vlapic->apic.ppr = ppr; + vlapic->apic_page->ppr = ppr; VLAPIC_CTR1(vlapic, "vlapic_update_ppr 0x%02x", ppr); } static void vlapic_process_eoi(struct vlapic *vlapic) { - struct LAPIC *lapic = &vlapic->apic; + struct LAPIC *lapic = vlapic->apic_page; uint32_t *isrptr, *tmrptr; int i, idx, bitpos, vector; @@ -675,7 +641,7 @@ vlapic_fire_cmci(struct vlapic *vlapic) } } -static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_ENTRIES, +static VMM_STAT_ARRAY(LVTS_TRIGGERRED, VLAPIC_MAXLVT_INDEX + 1, "lvts triggered"); int @@ -735,8 +701,6 @@ vlapic_callout_handler(void *arg) callout_deactivate(&vlapic->callout); - KASSERT(vlapic->apic.icr_timer != 0, ("vlapic timer is disabled")); - vlapic_fire_timer(vlapic); if (vlapic_periodic_timer(vlapic)) { @@ -781,16 +745,17 @@ done: VLAPIC_TIMER_UNLOCK(vlapic); } -static void -vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer) +void +vlapic_icrtmr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; sbintime_t sbt; + uint32_t icr_timer; VLAPIC_TIMER_LOCK(vlapic); - lapic = &vlapic->apic; - lapic->icr_timer = icr_timer; + lapic = vlapic->apic_page; + icr_timer = lapic->icr_timer; vlapic->timer_period_bt = vlapic->timer_freq_bt; bintime_mul(&vlapic->timer_period_bt, icr_timer); @@ -872,8 +837,8 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, CPU_CLR(vcpuid, &amask); vlapic = vm_lapic(vm, vcpuid); - dfr = vlapic_get_dfr(vlapic); - ldr = vlapic_get_ldr(vlapic); + dfr = vlapic->apic_page->dfr; + ldr = vlapic->apic_page->ldr; if ((dfr & APIC_DFR_MODEL_MASK) == APIC_DFR_MODEL_FLAT) { @@ -912,16 +877,22 @@ vlapic_calcdest(struct vm *vm, cpuset_t *dmask, uint32_t dest, bool phys, static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu"); -static int -lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu) +int +vlapic_icrlo_write_handler(struct vlapic *vlapic, bool *retu) { int i; bool phys; cpuset_t dmask; + uint64_t icrval; uint32_t dest, vec, mode; struct vlapic *vlapic2; struct vm_exit *vmexit; - + struct LAPIC *lapic; + + lapic = vlapic->apic_page; + lapic->icr_lo &= ~APIC_DELSTAT_PEND; + icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo; + if (x2apic(vlapic)) dest = icrval >> 32; else @@ -931,9 +902,12 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu) if (mode == APIC_DELMODE_FIXED && vec < 16) { vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR); + VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec); return (0); } - + + VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec); + if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) { switch (icrval & APIC_DEST_MASK) { case APIC_DEST_DESTFLD: @@ -963,8 +937,13 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu) lapic_intr_edge(vlapic->vm, i, vec); vmm_stat_array_incr(vlapic->vm, vlapic->vcpuid, IPIS_SENT, i, 1); - } else + VLAPIC_CTR2(vlapic, "vlapic sending ipi %d " + "to vcpuid %d", vec, i); + } else { vm_inject_nmi(vlapic->vm, i); + VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi " + "to vcpuid %d", i); + } } return (0); /* handled completely in the kernel */ @@ -1019,12 +998,15 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu) } int -vlapic_pending_intr(struct vlapic *vlapic) +vlapic_pending_intr(struct vlapic *vlapic, int *vecptr) { - struct LAPIC *lapic = &vlapic->apic; + struct LAPIC *lapic = vlapic->apic_page; int idx, i, bitpos, vector; uint32_t *irrptr, val; + if (vlapic->ops.pending_intr) + return ((*vlapic->ops.pending_intr)(vlapic, vecptr)); + irrptr = &lapic->irr0; /* @@ -1039,21 +1021,26 @@ vlapic_pending_intr(struct vlapic *vlapic) vector = i * 32 + (bitpos - 1); if (PRIO(vector) > PRIO(lapic->ppr)) { VLAPIC_CTR1(vlapic, "pending intr %d", vector); - return (vector); + if (vecptr != NULL) + *vecptr = vector; + return (1); } else break; } } - return (-1); + return (0); } void vlapic_intr_accepted(struct vlapic *vlapic, int vector) { - struct LAPIC *lapic = &vlapic->apic; + struct LAPIC *lapic = vlapic->apic_page; uint32_t *irrptr, *isrptr; int idx, stk_top; + if (vlapic->ops.intr_accepted) + return ((*vlapic->ops.intr_accepted)(vlapic, vector)); + /* * clear the ready bit for vector being accepted in irr * and set the vector as in service in isr. @@ -1081,24 +1068,30 @@ vlapic_intr_accepted(struct vlapic *vlapic, int vector) vlapic_update_ppr(vlapic); } -static void -lapic_set_svr(struct vlapic *vlapic, uint32_t new) +void +vlapic_svr_write_handler(struct vlapic *vlapic) { struct LAPIC *lapic; - uint32_t old, changed; + uint32_t old, new, changed; + + lapic = vlapic->apic_page; + + new = lapic->svr; + old = vlapic->svr_last; + vlapic->svr_last = new; - lapic = &vlapic->apic; - old = lapic->svr; changed = old ^ new; if ((changed & APIC_SVR_ENABLE) != 0) { if ((new & APIC_SVR_ENABLE) == 0) { /* - * The apic is now disabled so stop the apic timer. + * The apic is now disabled so stop the apic timer + * and mask all the LVT entries. */ VLAPIC_CTR0(vlapic, "vlapic is software-disabled"); VLAPIC_TIMER_LOCK(vlapic); callout_stop(&vlapic->callout); VLAPIC_TIMER_UNLOCK(vlapic); + vlapic_mask_lvts(vlapic); } else { /* * The apic is now enabled so restart the apic timer @@ -1106,16 +1099,15 @@ lapic_set_svr(struct vlapic *vlapic, uint32_t new) */ VLAPIC_CTR0(vlapic, "vlapic is software-enabled"); if (vlapic_periodic_timer(vlapic)) - vlapic_set_icr_timer(vlapic, lapic->icr_timer); + vlapic_icrtmr_write_handler(vlapic); } } - lapic->svr = new; } int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu) { - struct LAPIC *lapic = &vlapic->apic; + struct LAPIC *lapic = vlapic->apic_page; uint32_t *reg; int i; @@ -1128,7 +1120,7 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu) switch(offset) { case APIC_OFFSET_ID: - *data = vlapic_get_id(vlapic); + *data = lapic->id; break; case APIC_OFFSET_VER: *data = lapic->version; @@ -1146,10 +1138,10 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu) *data = lapic->eoi; break; case APIC_OFFSET_LDR: - *data = vlapic_get_ldr(vlapic); + *data = lapic->ldr; break; case APIC_OFFSET_DFR: - *data = vlapic_get_dfr(vlapic); + *data = lapic->dfr; break; case APIC_OFFSET_SVR: *data = lapic->svr; @@ -1174,6 +1166,8 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu) break; case APIC_OFFSET_ICR_LOW: *data = lapic->icr_lo; + if (x2apic(vlapic)) + *data |= (uint64_t)lapic->icr_hi << 32; break; case APIC_OFFSET_ICR_HI: *data = lapic->icr_hi; @@ -1181,14 +1175,19 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu) case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: *data = vlapic_get_lvt(vlapic, offset); +#ifdef INVARIANTS + reg = vlapic_get_lvtptr(vlapic, offset); + KASSERT(*data == *reg, ("inconsistent lvt value at " + "offset %#lx: %#lx/%#x", offset, *data, *reg)); +#endif break; - case APIC_OFFSET_ICR: + case APIC_OFFSET_TIMER_ICR: *data = lapic->icr_timer; break; - case APIC_OFFSET_CCR: + case APIC_OFFSET_TIMER_CCR: *data = vlapic_get_ccr(vlapic); break; - case APIC_OFFSET_DCR: + case APIC_OFFSET_TIMER_DCR: *data = lapic->dcr_timer; break; case APIC_OFFSET_RRR: @@ -1204,9 +1203,13 @@ done: int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu) { - struct LAPIC *lapic = &vlapic->apic; + struct LAPIC *lapic = vlapic->apic_page; + uint32_t *regptr; int retval; + KASSERT((offset & 0xf) == 0 && offset < PAGE_SIZE, + ("vlapic_write: invalid offset %#lx", offset)); + VLAPIC_CTR2(vlapic, "vlapic write offset %#x, data %#lx", offset, data); if (offset > sizeof(*lapic)) { @@ -1214,10 +1217,11 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu) } retval = 0; - offset &= ~3; switch(offset) { case APIC_OFFSET_ID: + lapic->id = data; + vlapic_id_write_handler(vlapic); break; case APIC_OFFSET_TPR: lapic->tpr = data & 0xff; @@ -1227,41 +1231,44 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu) vlapic_process_eoi(vlapic); break; case APIC_OFFSET_LDR: - vlapic_set_ldr(vlapic, data); + lapic->ldr = data; + vlapic_ldr_write_handler(vlapic); break; case APIC_OFFSET_DFR: - vlapic_set_dfr(vlapic, data); + lapic->dfr = data; + vlapic_dfr_write_handler(vlapic); break; case APIC_OFFSET_SVR: - lapic_set_svr(vlapic, data); + lapic->svr = data; + vlapic_svr_write_handler(vlapic); break; case APIC_OFFSET_ICR_LOW: - if (!x2apic(vlapic)) { - data &= 0xffffffff; - data |= (uint64_t)lapic->icr_hi << 32; - } - retval = lapic_process_icr(vlapic, data, retu); + lapic->icr_lo = data; + if (x2apic(vlapic)) + lapic->icr_hi = data >> 32; + retval = vlapic_icrlo_write_handler(vlapic, retu); break; case APIC_OFFSET_ICR_HI: - if (!x2apic(vlapic)) { - retval = 0; - lapic->icr_hi = data; - } + lapic->icr_hi = data; break; case APIC_OFFSET_CMCI_LVT: case APIC_OFFSET_TIMER_LVT ... APIC_OFFSET_ERROR_LVT: - vlapic_set_lvt(vlapic, offset, data); + regptr = vlapic_get_lvtptr(vlapic, offset); + *regptr = data; + vlapic_lvt_write_handler(vlapic, offset); break; - case APIC_OFFSET_ICR: - vlapic_set_icr_timer(vlapic, data); + case APIC_OFFSET_TIMER_ICR: + lapic->icr_timer = data; + vlapic_icrtmr_write_handler(vlapic); break; - case APIC_OFFSET_DCR: - vlapic_set_dcr(vlapic, data); + case APIC_OFFSET_TIMER_DCR: + lapic->dcr_timer = data; + vlapic_dcr_write_handler(vlapic); break; case APIC_OFFSET_ESR: - vlapic_update_errors(vlapic); + vlapic_esr_write_handler(vlapic); break; case APIC_OFFSET_VER: case APIC_OFFSET_APR: @@ -1270,7 +1277,7 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu) case APIC_OFFSET_ISR0 ... APIC_OFFSET_ISR7: case APIC_OFFSET_TMR0 ... APIC_OFFSET_TMR7: case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7: - case APIC_OFFSET_CCR: + case APIC_OFFSET_TIMER_CCR: default: // Read only. break; @@ -1279,14 +1286,41 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu) return (retval); } -struct vlapic * -vlapic_init(struct vm *vm, int vcpuid) +static void +vlapic_reset(struct vlapic *vlapic) { - struct vlapic *vlapic; + struct LAPIC *lapic; + + lapic = vlapic->apic_page; + bzero(lapic, sizeof(struct LAPIC)); - vlapic = malloc(sizeof(struct vlapic), M_VLAPIC, M_WAITOK | M_ZERO); - vlapic->vm = vm; - vlapic->vcpuid = vcpuid; + lapic->id = vlapic_get_id(vlapic); + lapic->version = VLAPIC_VERSION; + lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT); + lapic->dfr = 0xffffffff; + lapic->svr = APIC_SVR_VECTOR; + vlapic_mask_lvts(vlapic); + vlapic_reset_tmr(vlapic); + + lapic->dcr_timer = 0; + vlapic_dcr_write_handler(vlapic); + + if (vlapic->vcpuid == 0) + vlapic->boot_state = BS_RUNNING; /* BSP */ + else + vlapic->boot_state = BS_INIT; /* AP */ + + vlapic->svr_last = lapic->svr; +} + +void +vlapic_init(struct vlapic *vlapic) +{ + KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized")); + KASSERT(vlapic->vcpuid >= 0 && vlapic->vcpuid < VM_MAXCPU, + ("vlapic_init: vcpuid is not initialized")); + KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not " + "initialized")); /* * If the vlapic is configured in x2apic mode then it will be @@ -1300,12 +1334,10 @@ vlapic_init(struct vm *vm, int vcpuid) vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED; - if (vcpuid == 0) + if (vlapic->vcpuid == 0) vlapic->msr_apicbase |= APICBASE_BSP; vlapic_reset(vlapic); - - return (vlapic); } void @@ -1313,7 +1345,6 @@ vlapic_cleanup(struct vlapic *vlapic) { callout_drain(&vlapic->callout); - free(vlapic, M_VLAPIC); } uint64_t @@ -1324,19 +1355,38 @@ vlapic_get_apicbase(struct vlapic *vlapic) } void -vlapic_set_apicbase(struct vlapic *vlapic, uint64_t val) +vlapic_set_apicbase(struct vlapic *vlapic, uint64_t new) { - int err; + struct LAPIC *lapic; enum x2apic_state state; + uint64_t old; + int err; err = vm_get_x2apic_state(vlapic->vm, vlapic->vcpuid, &state); if (err) panic("vlapic_set_apicbase: err %d fetching x2apic state", err); if (state == X2APIC_DISABLED) - val &= ~APICBASE_X2APIC; + new &= ~APICBASE_X2APIC; + + old = vlapic->msr_apicbase; + vlapic->msr_apicbase = new; - vlapic->msr_apicbase = val; + /* + * If the vlapic is switching between xAPIC and x2APIC modes then + * reset the mode-dependent registers. + */ + if ((old ^ new) & APICBASE_X2APIC) { + lapic = vlapic->apic_page; + lapic->id = vlapic_get_id(vlapic); + if (x2apic(vlapic)) { + lapic->ldr = x2apic_ldr(vlapic); + lapic->dfr = 0; + } else { + lapic->ldr = 0; + lapic->dfr = 0xffffffff; + } + } } void @@ -1378,10 +1428,28 @@ vlapic_deliver_intr(struct vm *vm, bool level, uint32_t dest, bool phys, } } +void +vlapic_post_intr(struct vlapic *vlapic, int hostcpu, int ipinum) +{ + /* + * Post an interrupt to the vcpu currently running on 'hostcpu'. + * + * This is done by leveraging features like Posted Interrupts (Intel) + * Doorbell MSR (AMD AVIC) that avoid a VM exit. + * + * If neither of these features are available then fallback to + * sending an IPI to 'hostcpu'. + */ + if (vlapic->ops.post_intr) + (*vlapic->ops.post_intr)(vlapic, hostcpu); + else + ipi_cpu(hostcpu, ipinum); +} + bool vlapic_enabled(struct vlapic *vlapic) { - struct LAPIC *lapic = &vlapic->apic; + struct LAPIC *lapic = vlapic->apic_page; if ((vlapic->msr_apicbase & APICBASE_ENABLED) != 0 && (lapic->svr & APIC_SVR_ENABLE) != 0) @@ -1389,3 +1457,62 @@ vlapic_enabled(struct vlapic *vlapic) else return (false); } + +static void +vlapic_set_tmr(struct vlapic *vlapic, int vector, bool level) +{ + struct LAPIC *lapic; + uint32_t *tmrptr, mask; + int idx; + + lapic = vlapic->apic_page; + tmrptr = &lapic->tmr0; + idx = (vector / 32) * 4; + mask = 1 << (vector % 32); + if (level) + tmrptr[idx] |= mask; + else + tmrptr[idx] &= ~mask; + + if (vlapic->ops.set_tmr != NULL) + (*vlapic->ops.set_tmr)(vlapic, vector, level); +} + +void +vlapic_reset_tmr(struct vlapic *vlapic) +{ + int vector; + + VLAPIC_CTR0(vlapic, "vlapic resetting all vectors to edge-triggered"); + + for (vector = 0; vector <= 255; vector++) + vlapic_set_tmr(vlapic, vector, false); +} + +void +vlapic_set_tmr_level(struct vlapic *vlapic, uint32_t dest, bool phys, + int delmode, int vector) +{ + cpuset_t dmask; + bool lowprio; + + KASSERT(vector >= 0 && vector <= 255, ("invalid vector %d", vector)); + + /* + * A level trigger is valid only for fixed and lowprio delivery modes. + */ + if (delmode != APIC_DELMODE_FIXED && delmode != APIC_DELMODE_LOWPRIO) { + VLAPIC_CTR1(vlapic, "Ignoring level trigger-mode for " + "delivery-mode %d", delmode); + return; + } + + lowprio = (delmode == APIC_DELMODE_LOWPRIO); + vlapic_calcdest(vlapic->vm, &dmask, dest, phys, lowprio, false); + + if (!CPU_ISSET(vlapic->vcpuid, &dmask)) + return; + + VLAPIC_CTR1(vlapic, "vector %d set to level-triggered", vector); + vlapic_set_tmr(vlapic, vector, true); +} |