diff options
author | jhb <jhb@FreeBSD.org> | 2014-02-23 00:46:05 +0000 |
---|---|---|
committer | jhb <jhb@FreeBSD.org> | 2014-02-23 00:46:05 +0000 |
commit | 69d17427cae2b573203a13c2fe8cac0865c3cfdc (patch) | |
tree | 74c089432fac1660f52a522e3e53195374381e38 /usr.sbin/bhyve | |
parent | 04e37d68ee180962d9cdaef4ffd90789f36548ab (diff) | |
download | FreeBSD-src-69d17427cae2b573203a13c2fe8cac0865c3cfdc.zip FreeBSD-src-69d17427cae2b573203a13c2fe8cac0865c3cfdc.tar.gz |
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve:
- Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'.
- If a vcpu disables its local apic and then executes a 'HLT' then spin
down the vcpu and destroy its thread context. Also modify the 'HLT'
processing to ignore pending interrupts in the IRR if interrupts have
been disabled by the guest. The interrupt cannot be injected into the
guest in any case so resuming it is futile.
- Use callout(9) to drive the vlapic timer instead of clocking it on each
VM exit.
- When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP.
- Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.
- Fix the vlapic version register.
- Add a command to bhyvectl to inject an NMI on a specific vcpu.
- Add an API to deliver message signalled interrupts to vcpus. This allows
callers to treat the MSI 'addr' and 'data' fields as opaque and also lets
bhyve implement multiple destination modes: physical, flat and clustered.
- Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to
'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively.
- Consolidate the virtual apic initialization in a single function:
vlapic_reset()
- Add a generic routine to trigger an LVT interrupt that supports both
fixed and NMI delivery modes.
- Add an ioctl and bhyvectl command to trigger local interrupts inside a
guest. In particular, a global NMI similar to that raised by SERR# or
PERR# can be simulated by asserting LINT1 on all vCPUs.
- Extend the LVT table in the vCPU local APIC to support CMCI.
- Flesh out the local APIC error reporting a bit to cache errors and
report them via ESR when ESR is written to. Add support for asserting
the error LVT when an error occurs. Raise illegal vector errors when
attempting to signal an invalid vector for an interrupt or when sending
an IPI.
- Export table entries in the MADT and MP Table advertising the stock x86
config of LINT0 set to ExtInt and LINT1 wired to NMI.
Diffstat (limited to 'usr.sbin/bhyve')
-rw-r--r-- | usr.sbin/bhyve/acpi.c | 10 | ||||
-rw-r--r-- | usr.sbin/bhyve/bhyverun.c | 32 | ||||
-rw-r--r-- | usr.sbin/bhyve/mptbl.c | 32 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_emul.c | 33 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_emul.h | 8 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_passthru.c | 34 |
6 files changed, 104 insertions, 45 deletions
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index 07cd49f..818e7f2 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -297,6 +297,16 @@ basl_fwrite_madt(FILE *fp) EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n"); EFPRINTF(fp, "\n"); + /* Local APIC NMI is connected to LINT 1 on all CPUs */ + EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n"); + EFPRINTF(fp, "[0001]\t\tLength : 06\n"); + EFPRINTF(fp, "[0001]\t\tProcessorId : FF\n"); + EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n"); + EFPRINTF(fp, "\t\t\tPolarity : 1\n"); + EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n"); + EFPRINTF(fp, "[0001]\t\tInterrupt : 01\n"); + EFPRINTF(fp, "\n"); + EFFLUSH(fp); return (0); diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index 745fbf7..6c4df9c 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$"); #include <sys/mman.h> #include <sys/time.h> +#include <machine/atomic.h> #include <machine/segments.h> #include <stdio.h> @@ -86,8 +87,6 @@ static int pincpu = -1; static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic; static int virtio_msix = 1; -static int foundcpus; - static int strictio; static int acpi; @@ -211,8 +210,7 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) exit(1); } - cpumask |= 1 << vcpu; - foundcpus++; + atomic_set_int(&cpumask, 1 << vcpu); /* * Set up the vmexit struct to allow execution to start @@ -230,6 +228,20 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip) } static int +fbsdrun_deletecpu(struct vmctx *ctx, int vcpu) +{ + + if ((cpumask & (1 << vcpu)) == 0) { + fprintf(stderr, "addcpu: attempting to delete unknown cpu %d\n", + vcpu); + exit(1); + } + + atomic_clear_int(&cpumask, 1 << vcpu); + return (cpumask == 0); +} + +static int vmexit_catch_reset(void) { stats.io_reset++; @@ -333,6 +345,17 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) } static int +vmexit_spindown_cpu(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu) +{ + int lastcpu; + + lastcpu = fbsdrun_deletecpu(ctx, *pvcpu); + if (!lastcpu) + pthread_exit(NULL); + return (vmexit_catch_reset()); +} + +static int vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) { @@ -423,6 +446,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = { [VM_EXITCODE_MTRAP] = vmexit_mtrap, [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul, [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap, + [VM_EXITCODE_SPINDOWN_CPU] = vmexit_spindown_cpu, }; static void diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c index 2b4ca84..ea332d4 100644 --- a/usr.sbin/bhyve/mptbl.c +++ b/usr.sbin/bhyve/mptbl.c @@ -72,6 +72,9 @@ __FBSDID("$FreeBSD$"); #define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */ +/* Number of local intr entries */ +#define MPEII_NUM_LOCAL_IRQ 2 + /* Number of i/o intr entries */ #define MPEII_MAX_IRQ 24 @@ -141,6 +144,30 @@ mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu) } static void +mpt_build_localint_entries(int_entry_ptr mpie) +{ + + /* Hardcode LINT0 as ExtINT on all CPUs. */ + memset(mpie, 0, sizeof(*mpie)); + mpie->type = MPCT_ENTRY_LOCAL_INT; + mpie->int_type = INTENTRY_TYPE_EXTINT; + mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | + INTENTRY_FLAGS_TRIGGER_CONFORM; + mpie->dst_apic_id = 0xff; + mpie->dst_apic_int = 0; + mpie++; + + /* Hardcode LINT1 as NMI on all CPUs. */ + memset(mpie, 0, sizeof(*mpie)); + mpie->type = MPCT_ENTRY_LOCAL_INT; + mpie->int_type = INTENTRY_TYPE_NMI; + mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM | + INTENTRY_FLAGS_TRIGGER_CONFORM; + mpie->dst_apic_id = 0xff; + mpie->dst_apic_int = 1; +} + +static void mpt_build_bus_entries(bus_entry_ptr mpeb) { @@ -284,6 +311,11 @@ mptable_build(struct vmctx *ctx, int ncpu) curraddr += sizeof(*mpie) * MPEII_MAX_IRQ; mpch->entry_count += MPEII_MAX_IRQ; + mpie = (int_entry_ptr)curraddr; + mpt_build_localint_entries(mpie); + curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ; + mpch->entry_count += MPEII_NUM_LOCAL_IRQ; + if (oem_tbl_start) { mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE; mpch->oem_table_size = oem_tbl_size; diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index cdea967..5adb739 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -853,19 +853,14 @@ msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, else msgdata = pci_get_cfgdata16(pi, capoff + 8); - /* - * XXX check delivery mode, destination mode etc - */ mme = msgctrl & PCIM_MSICTRL_MME_MASK; pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0; if (pi->pi_msi.enabled) { - pi->pi_msi.cpu = (addrlo >> 12) & 0xff; - pi->pi_msi.vector = msgdata & 0xff; - pi->pi_msi.msgnum = 1 << (mme >> 4); + pi->pi_msi.addr = addrlo; + pi->pi_msi.msg_data = msgdata; + pi->pi_msi.maxmsgnum = 1 << (mme >> 4); } else { - pi->pi_msi.cpu = 0; - pi->pi_msi.vector = 0; - pi->pi_msi.msgnum = 0; + pi->pi_msi.maxmsgnum = 0; } } @@ -1143,10 +1138,10 @@ pci_msi_enabled(struct pci_devinst *pi) } int -pci_msi_msgnum(struct pci_devinst *pi) +pci_msi_maxmsgnum(struct pci_devinst *pi) { if (pi->pi_msi.enabled) - return (pi->pi_msi.msgnum); + return (pi->pi_msi.maxmsgnum); else return (0); } @@ -1175,19 +1170,17 @@ pci_generate_msix(struct pci_devinst *pi, int index) mte = &pi->pi_msix.table[index]; if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { /* XXX Set PBA bit if interrupt is disabled */ - vm_lapic_irq(pi->pi_vmctx, - (mte->addr >> 12) & 0xff, mte->msg_data & 0xff); + vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data); } } void -pci_generate_msi(struct pci_devinst *pi, int msg) +pci_generate_msi(struct pci_devinst *pi, int index) { - if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) { - vm_lapic_irq(pi->pi_vmctx, - pi->pi_msi.cpu, - pi->pi_msi.vector + msg); + if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) { + vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr, + pi->pi_msi.msg_data + index); } } @@ -1595,10 +1588,10 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx, * Special magic value to generate an interrupt */ if (offset == 4 && size == 4 && pci_msi_enabled(pi)) - pci_generate_msi(pi, value % pci_msi_msgnum(pi)); + pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi)); if (value == 0xabcdef) { - for (i = 0; i < pci_msi_msgnum(pi); i++) + for (i = 0; i < pci_msi_maxmsgnum(pi); i++) pci_generate_msi(pi, i); } } diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index b97c5b1..002924d 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -112,10 +112,10 @@ struct pci_devinst { int pi_bar_getsize; struct { - int enabled; - int cpu; - int vector; - int msgnum; + int enabled; + uint64_t addr; + uint64_t msg_data; + int maxmsgnum; } pi_msi; struct { diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 43c542d..dab5ffc 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -345,12 +345,10 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, /* If the entry is masked, don't set it up */ if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { - error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, - index, entry->msg_data, - entry->vector_control, - entry->addr); + error = vm_setup_pptdev_msix(ctx, vcpu, + sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, index, entry->addr, + entry->msg_data, entry->vector_control); } } } @@ -652,11 +650,12 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (msicap_access(sc, coff)) { msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val); - error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu, - pi->pi_msi.vector, pi->pi_msi.msgnum); + error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, + pi->pi_msi.addr, pi->pi_msi.msg_data, + pi->pi_msi.maxmsgnum); if (error != 0) { - printf("vm_setup_msi returned error %d\r\n", errno); + printf("vm_setup_pptdev_msi error %d\r\n", errno); exit(1); } return (0); @@ -667,15 +666,16 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, if (pi->pi_msix.enabled) { msix_table_entries = pi->pi_msix.table_count; for (i = 0; i < msix_table_entries; i++) { - error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus, - sc->psc_sel.pc_dev, - sc->psc_sel.pc_func, i, - pi->pi_msix.table[i].msg_data, - pi->pi_msix.table[i].vector_control, - pi->pi_msix.table[i].addr); + error = vm_setup_pptdev_msix(ctx, vcpu, + sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, + pi->pi_msix.table[i].addr, + pi->pi_msix.table[i].msg_data, + pi->pi_msix.table[i].vector_control); if (error) { - printf("vm_setup_msix returned error %d\r\n", errno); + printf("vm_setup_pptdev_msix error " + "%d\r\n", errno); exit(1); } } |