summaryrefslogtreecommitdiffstats
path: root/usr.sbin
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2014-02-23 00:46:05 +0000
committerjhb <jhb@FreeBSD.org>2014-02-23 00:46:05 +0000
commit69d17427cae2b573203a13c2fe8cac0865c3cfdc (patch)
tree74c089432fac1660f52a522e3e53195374381e38 /usr.sbin
parent04e37d68ee180962d9cdaef4ffd90789f36548ab (diff)
downloadFreeBSD-src-69d17427cae2b573203a13c2fe8cac0865c3cfdc.zip
FreeBSD-src-69d17427cae2b573203a13c2fe8cac0865c3cfdc.tar.gz
MFC 258859,259081,259085,259205,259213,259275,259482,259537,259702,259779:
Several changes to the local APIC support in bhyve: - Rename 'vm_interrupt_hostcpu()' to 'vcpu_notify_event()'. - If a vcpu disables its local apic and then executes a 'HLT' then spin down the vcpu and destroy its thread context. Also modify the 'HLT' processing to ignore pending interrupts in the IRR if interrupts have been disabled by the guest. The interrupt cannot be injected into the guest in any case so resuming it is futile. - Use callout(9) to drive the vlapic timer instead of clocking it on each VM exit. - When the guest is bringing up the APs in the x2APIC mode a write to the ICR register will now trigger a return to userspace with an exitcode of VM_EXITCODE_SPINUP_AP. - Change the vlapic timer lock to be a spinlock because the vlapic can be accessed from within a critical section (vm run loop) when guest is using x2apic mode. - Fix the vlapic version register. - Add a command to bhyvectl to inject an NMI on a specific vcpu. - Add an API to deliver message signalled interrupts to vcpus. This allows callers to treat the MSI 'addr' and 'data' fields as opaque and also lets bhyve implement multiple destination modes: physical, flat and clustered. - Rename the ambiguously named 'vm_setup_msi()' and 'vm_setup_msix()' to 'vm_setup_pptdev_msi()' and 'vm_setup_pptdev_msix()' respectively. - Consolidate the virtual apic initialization in a single function: vlapic_reset() - Add a generic routine to trigger an LVT interrupt that supports both fixed and NMI delivery modes. - Add an ioctl and bhyvectl command to trigger local interrupts inside a guest. In particular, a global NMI similar to that raised by SERR# or PERR# can be simulated by asserting LINT1 on all vCPUs. - Extend the LVT table in the vCPU local APIC to support CMCI. - Flesh out the local APIC error reporting a bit to cache errors and report them via ESR when ESR is written to. Add support for asserting the error LVT when an error occurs. Raise illegal vector errors when attempting to signal an invalid vector for an interrupt or when sending an IPI. - Export table entries in the MADT and MP Table advertising the stock x86 config of LINT0 set to ExtInt and LINT1 wired to NMI.
Diffstat (limited to 'usr.sbin')
-rw-r--r--usr.sbin/bhyve/acpi.c10
-rw-r--r--usr.sbin/bhyve/bhyverun.c32
-rw-r--r--usr.sbin/bhyve/mptbl.c32
-rw-r--r--usr.sbin/bhyve/pci_emul.c33
-rw-r--r--usr.sbin/bhyve/pci_emul.h8
-rw-r--r--usr.sbin/bhyve/pci_passthru.c34
-rw-r--r--usr.sbin/bhyvectl/bhyvectl.c20
7 files changed, 123 insertions, 46 deletions
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c
index 07cd49f..818e7f2 100644
--- a/usr.sbin/bhyve/acpi.c
+++ b/usr.sbin/bhyve/acpi.c
@@ -297,6 +297,16 @@ basl_fwrite_madt(FILE *fp)
EFPRINTF(fp, "\t\t\tTrigger Mode : 3\n");
EFPRINTF(fp, "\n");
+ /* Local APIC NMI is connected to LINT 1 on all CPUs */
+ EFPRINTF(fp, "[0001]\t\tSubtable Type : 04\n");
+ EFPRINTF(fp, "[0001]\t\tLength : 06\n");
+ EFPRINTF(fp, "[0001]\t\tProcessorId : FF\n");
+ EFPRINTF(fp, "[0002]\t\tFlags (decoded below) : 0005\n");
+ EFPRINTF(fp, "\t\t\tPolarity : 1\n");
+ EFPRINTF(fp, "\t\t\tTrigger Mode : 1\n");
+ EFPRINTF(fp, "[0001]\t\tInterrupt : 01\n");
+ EFPRINTF(fp, "\n");
+
EFFLUSH(fp);
return (0);
diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c
index 745fbf7..6c4df9c 100644
--- a/usr.sbin/bhyve/bhyverun.c
+++ b/usr.sbin/bhyve/bhyverun.c
@@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mman.h>
#include <sys/time.h>
+#include <machine/atomic.h>
#include <machine/segments.h>
#include <stdio.h>
@@ -86,8 +87,6 @@ static int pincpu = -1;
static int guest_vmexit_on_hlt, guest_vmexit_on_pause, disable_x2apic;
static int virtio_msix = 1;
-static int foundcpus;
-
static int strictio;
static int acpi;
@@ -211,8 +210,7 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
exit(1);
}
- cpumask |= 1 << vcpu;
- foundcpus++;
+ atomic_set_int(&cpumask, 1 << vcpu);
/*
* Set up the vmexit struct to allow execution to start
@@ -230,6 +228,20 @@ fbsdrun_addcpu(struct vmctx *ctx, int vcpu, uint64_t rip)
}
static int
+fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
+{
+
+ if ((cpumask & (1 << vcpu)) == 0) {
+ fprintf(stderr, "addcpu: attempting to delete unknown cpu %d\n",
+ vcpu);
+ exit(1);
+ }
+
+ atomic_clear_int(&cpumask, 1 << vcpu);
+ return (cpumask == 0);
+}
+
+static int
vmexit_catch_reset(void)
{
stats.io_reset++;
@@ -333,6 +345,17 @@ vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
}
static int
+vmexit_spindown_cpu(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+ int lastcpu;
+
+ lastcpu = fbsdrun_deletecpu(ctx, *pvcpu);
+ if (!lastcpu)
+ pthread_exit(NULL);
+ return (vmexit_catch_reset());
+}
+
+static int
vmexit_vmx(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
@@ -423,6 +446,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
+ [VM_EXITCODE_SPINDOWN_CPU] = vmexit_spindown_cpu,
};
static void
diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c
index 2b4ca84..ea332d4 100644
--- a/usr.sbin/bhyve/mptbl.c
+++ b/usr.sbin/bhyve/mptbl.c
@@ -72,6 +72,9 @@ __FBSDID("$FreeBSD$");
#define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */
+/* Number of local intr entries */
+#define MPEII_NUM_LOCAL_IRQ 2
+
/* Number of i/o intr entries */
#define MPEII_MAX_IRQ 24
@@ -141,6 +144,30 @@ mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu)
}
static void
+mpt_build_localint_entries(int_entry_ptr mpie)
+{
+
+ /* Hardcode LINT0 as ExtINT on all CPUs. */
+ memset(mpie, 0, sizeof(*mpie));
+ mpie->type = MPCT_ENTRY_LOCAL_INT;
+ mpie->int_type = INTENTRY_TYPE_EXTINT;
+ mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+ INTENTRY_FLAGS_TRIGGER_CONFORM;
+ mpie->dst_apic_id = 0xff;
+ mpie->dst_apic_int = 0;
+ mpie++;
+
+ /* Hardcode LINT1 as NMI on all CPUs. */
+ memset(mpie, 0, sizeof(*mpie));
+ mpie->type = MPCT_ENTRY_LOCAL_INT;
+ mpie->int_type = INTENTRY_TYPE_NMI;
+ mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
+ INTENTRY_FLAGS_TRIGGER_CONFORM;
+ mpie->dst_apic_id = 0xff;
+ mpie->dst_apic_int = 1;
+}
+
+static void
mpt_build_bus_entries(bus_entry_ptr mpeb)
{
@@ -284,6 +311,11 @@ mptable_build(struct vmctx *ctx, int ncpu)
curraddr += sizeof(*mpie) * MPEII_MAX_IRQ;
mpch->entry_count += MPEII_MAX_IRQ;
+ mpie = (int_entry_ptr)curraddr;
+ mpt_build_localint_entries(mpie);
+ curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ;
+ mpch->entry_count += MPEII_NUM_LOCAL_IRQ;
+
if (oem_tbl_start) {
mpch->oem_table_pointer = curraddr - startaddr + MPTABLE_BASE;
mpch->oem_table_size = oem_tbl_size;
diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c
index cdea967..5adb739 100644
--- a/usr.sbin/bhyve/pci_emul.c
+++ b/usr.sbin/bhyve/pci_emul.c
@@ -853,19 +853,14 @@ msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
else
msgdata = pci_get_cfgdata16(pi, capoff + 8);
- /*
- * XXX check delivery mode, destination mode etc
- */
mme = msgctrl & PCIM_MSICTRL_MME_MASK;
pi->pi_msi.enabled = msgctrl & PCIM_MSICTRL_MSI_ENABLE ? 1 : 0;
if (pi->pi_msi.enabled) {
- pi->pi_msi.cpu = (addrlo >> 12) & 0xff;
- pi->pi_msi.vector = msgdata & 0xff;
- pi->pi_msi.msgnum = 1 << (mme >> 4);
+ pi->pi_msi.addr = addrlo;
+ pi->pi_msi.msg_data = msgdata;
+ pi->pi_msi.maxmsgnum = 1 << (mme >> 4);
} else {
- pi->pi_msi.cpu = 0;
- pi->pi_msi.vector = 0;
- pi->pi_msi.msgnum = 0;
+ pi->pi_msi.maxmsgnum = 0;
}
}
@@ -1143,10 +1138,10 @@ pci_msi_enabled(struct pci_devinst *pi)
}
int
-pci_msi_msgnum(struct pci_devinst *pi)
+pci_msi_maxmsgnum(struct pci_devinst *pi)
{
if (pi->pi_msi.enabled)
- return (pi->pi_msi.msgnum);
+ return (pi->pi_msi.maxmsgnum);
else
return (0);
}
@@ -1175,19 +1170,17 @@ pci_generate_msix(struct pci_devinst *pi, int index)
mte = &pi->pi_msix.table[index];
if ((mte->vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
/* XXX Set PBA bit if interrupt is disabled */
- vm_lapic_irq(pi->pi_vmctx,
- (mte->addr >> 12) & 0xff, mte->msg_data & 0xff);
+ vm_lapic_msi(pi->pi_vmctx, mte->addr, mte->msg_data);
}
}
void
-pci_generate_msi(struct pci_devinst *pi, int msg)
+pci_generate_msi(struct pci_devinst *pi, int index)
{
- if (pci_msi_enabled(pi) && msg < pci_msi_msgnum(pi)) {
- vm_lapic_irq(pi->pi_vmctx,
- pi->pi_msi.cpu,
- pi->pi_msi.vector + msg);
+ if (pci_msi_enabled(pi) && index < pci_msi_maxmsgnum(pi)) {
+ vm_lapic_msi(pi->pi_vmctx, pi->pi_msi.addr,
+ pi->pi_msi.msg_data + index);
}
}
@@ -1595,10 +1588,10 @@ pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
* Special magic value to generate an interrupt
*/
if (offset == 4 && size == 4 && pci_msi_enabled(pi))
- pci_generate_msi(pi, value % pci_msi_msgnum(pi));
+ pci_generate_msi(pi, value % pci_msi_maxmsgnum(pi));
if (value == 0xabcdef) {
- for (i = 0; i < pci_msi_msgnum(pi); i++)
+ for (i = 0; i < pci_msi_maxmsgnum(pi); i++)
pci_generate_msi(pi, i);
}
}
diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h
index b97c5b1..002924d 100644
--- a/usr.sbin/bhyve/pci_emul.h
+++ b/usr.sbin/bhyve/pci_emul.h
@@ -112,10 +112,10 @@ struct pci_devinst {
int pi_bar_getsize;
struct {
- int enabled;
- int cpu;
- int vector;
- int msgnum;
+ int enabled;
+ uint64_t addr;
+ uint64_t msg_data;
+ int maxmsgnum;
} pi_msi;
struct {
diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c
index 43c542d..dab5ffc 100644
--- a/usr.sbin/bhyve/pci_passthru.c
+++ b/usr.sbin/bhyve/pci_passthru.c
@@ -345,12 +345,10 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
/* If the entry is masked, don't set it up */
if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
(vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
- error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func,
- index, entry->msg_data,
- entry->vector_control,
- entry->addr);
+ error = vm_setup_pptdev_msix(ctx, vcpu,
+ sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
+ sc->psc_sel.pc_func, index, entry->addr,
+ entry->msg_data, entry->vector_control);
}
}
}
@@ -652,11 +650,12 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
if (msicap_access(sc, coff)) {
msicap_cfgwrite(pi, sc->psc_msi.capoff, coff, bytes, val);
- error = vm_setup_msi(ctx, vcpu, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_msi.cpu,
- pi->pi_msi.vector, pi->pi_msi.msgnum);
+ error = vm_setup_pptdev_msi(ctx, vcpu, sc->psc_sel.pc_bus,
+ sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
+ pi->pi_msi.addr, pi->pi_msi.msg_data,
+ pi->pi_msi.maxmsgnum);
if (error != 0) {
- printf("vm_setup_msi returned error %d\r\n", errno);
+ printf("vm_setup_pptdev_msi error %d\r\n", errno);
exit(1);
}
return (0);
@@ -667,15 +666,16 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
if (pi->pi_msix.enabled) {
msix_table_entries = pi->pi_msix.table_count;
for (i = 0; i < msix_table_entries; i++) {
- error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
- sc->psc_sel.pc_dev,
- sc->psc_sel.pc_func, i,
- pi->pi_msix.table[i].msg_data,
- pi->pi_msix.table[i].vector_control,
- pi->pi_msix.table[i].addr);
+ error = vm_setup_pptdev_msix(ctx, vcpu,
+ sc->psc_sel.pc_bus, sc->psc_sel.pc_dev,
+ sc->psc_sel.pc_func, i,
+ pi->pi_msix.table[i].addr,
+ pi->pi_msix.table[i].msg_data,
+ pi->pi_msix.table[i].vector_control);
if (error) {
- printf("vm_setup_msix returned error %d\r\n", errno);
+ printf("vm_setup_pptdev_msix error "
+ "%d\r\n", errno);
exit(1);
}
}
diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c
index 0e92204..c697492 100644
--- a/usr.sbin/bhyvectl/bhyvectl.c
+++ b/usr.sbin/bhyvectl/bhyvectl.c
@@ -189,12 +189,15 @@ usage(void)
" [--set-mem=<memory in units of MB>]\n"
" [--get-lowmem]\n"
" [--get-highmem]\n"
- " [--get-gpa-pmap]\n",
+ " [--get-gpa-pmap]\n"
+ " [--assert-lapic-lvt=<pin>]\n"
+ " [--inject-nmi]\n",
progname);
exit(1);
}
static int get_stats, getcap, setcap, capval, get_gpa_pmap;
+static int inject_nmi, assert_lapic_lvt;
static const char *capname;
static int create, destroy, get_lowmem, get_highmem;
static uint64_t memsize;
@@ -379,6 +382,7 @@ enum {
CAPNAME,
UNASSIGN_PPTDEV,
GET_GPA_PMAP,
+ ASSERT_LAPIC_LVT,
};
int
@@ -431,6 +435,7 @@ main(int argc, char *argv[])
{ "unassign-pptdev", REQ_ARG, 0, UNASSIGN_PPTDEV },
{ "setcap", REQ_ARG, 0, SET_CAP },
{ "get-gpa-pmap", REQ_ARG, 0, GET_GPA_PMAP },
+ { "assert-lapic-lvt", REQ_ARG, 0, ASSERT_LAPIC_LVT },
{ "getcap", NO_ARG, &getcap, 1 },
{ "get-stats", NO_ARG, &get_stats, 1 },
{ "get-desc-ds",NO_ARG, &get_desc_ds, 1 },
@@ -557,10 +562,12 @@ main(int argc, char *argv[])
{ "run", NO_ARG, &run, 1 },
{ "create", NO_ARG, &create, 1 },
{ "destroy", NO_ARG, &destroy, 1 },
+ { "inject-nmi", NO_ARG, &inject_nmi, 1 },
{ NULL, 0, NULL, 0 }
};
vcpu = 0;
+ assert_lapic_lvt = -1;
progname = basename(argv[0]);
while ((ch = getopt_long(argc, argv, "", opts, NULL)) != -1) {
@@ -682,6 +689,9 @@ main(int argc, char *argv[])
if (sscanf(optarg, "%d/%d/%d", &bus, &slot, &func) != 3)
usage();
break;
+ case ASSERT_LAPIC_LVT:
+ assert_lapic_lvt = atoi(optarg);
+ break;
default:
usage();
}
@@ -825,6 +835,14 @@ main(int argc, char *argv[])
vmcs_entry_interruption_info);
}
+ if (!error && inject_nmi) {
+ error = vm_inject_nmi(ctx, vcpu);
+ }
+
+ if (!error && assert_lapic_lvt != -1) {
+ error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt);
+ }
+
if (!error && (get_lowmem || get_all)) {
gpa = 0;
error = vm_get_memory_seg(ctx, gpa, &len, &wired);
OpenPOWER on IntegriCloud