From 9f0c999f8126597eb572b80056df88335dbd0070 Mon Sep 17 00:00:00 2001 From: grehan Date: Sat, 28 Apr 2012 16:28:00 +0000 Subject: MSI-x interrupt support for PCI pass-thru devices. Includes instruction emulation for memory r/w access. This opens the door for io-apic, local apic, hpet timer, and legacy device emulation. Submitted by: ryan dot berryhill at sandvine dot com Reviewed by: grehan Obtained from: Sandvine --- usr.sbin/bhyve/pci_passthru.c | 270 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 257 insertions(+), 13 deletions(-) (limited to 'usr.sbin/bhyve/pci_passthru.c') diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 1c417fd..a6f1f63 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include "pci_emul.h" +#include "instruction_emul.h" #ifndef _PATH_DEVPCI #define _PATH_DEVPCI "/dev/pci" @@ -58,6 +59,11 @@ __FBSDID("$FreeBSD$"); #define LEGACY_SUPPORT 1 +#define MSIX_TABLE_BIR_MASK 7 +#define MSIX_TABLE_OFFSET_MASK (~MSIX_TABLE_BIR_MASK); +#define MSIX_TABLE_COUNT(x) (((x) & 0x7FF) + 1) +#define MSIX_CAPLEN 12 + static int pcifd = -1; static int iofd = -1; @@ -69,6 +75,9 @@ struct passthru_softc { int msgctrl; int emulated; } psc_msi; + struct { + int capoff; + } psc_msix; struct pcisel psc_sel; }; @@ -152,17 +161,19 @@ passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) static int cfginitmsi(struct passthru_softc *sc) { - int ptr, cap, sts, caplen; + int ptr, capptr, cap, sts, caplen; uint32_t u32; struct pcisel sel; struct pci_devinst *pi; + struct msixcap msixcap; + uint32_t *msixcap_ptr; pi = sc->psc_pi; sel = sc->psc_sel; /* * Parse the capabilities and cache the location of the MSI - * capability. + * and MSI-X capabilities. */ sts = read_config(&sel, PCIR_STATUS, 2); if (sts & PCIM_STATUS_CAPPRESENT) { @@ -179,18 +190,44 @@ cfginitmsi(struct passthru_softc *sc) ptr + 2, 2); sc->psc_msi.emulated = 0; caplen = msi_caplen(sc->psc_msi.msgctrl); + capptr = ptr; while (caplen > 0) { - u32 = read_config(&sel, ptr, 4); - pci_set_cfgdata32(pi, ptr, u32); + u32 = read_config(&sel, capptr, 4); + pci_set_cfgdata32(pi, capptr, u32); caplen -= 4; - ptr += 4; + capptr += 4; + } + } else if (cap == PCIY_MSIX) { + /* + * Copy the MSI-X capability + */ + sc->psc_msix.capoff = ptr; + caplen = 12; + msixcap_ptr = (uint32_t*) &msixcap; + capptr = ptr; + while (caplen > 0) { + u32 = read_config(&sel, capptr, 4); + *msixcap_ptr = u32; + pci_set_cfgdata32(pi, capptr, u32); + caplen -= 4; + capptr += 4; + msixcap_ptr++; } - break; } ptr = read_config(&sel, ptr + PCICAP_NEXTPTR, 1); } } + if (sc->psc_msix.capoff == 0) + return (-1); + + pi->pi_msix.pba_bar = msixcap.pba_offset & MSIX_TABLE_BIR_MASK; + pi->pi_msix.pba_offset = msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK; + pi->pi_msix.table_bar = msixcap.table_offset & MSIX_TABLE_BIR_MASK; + pi->pi_msix.table_offset = msixcap.table_offset & MSIX_TABLE_OFFSET_MASK; + + pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); + #ifdef LEGACY_SUPPORT /* * If the passthrough device does not support MSI then craft a @@ -208,12 +245,182 @@ cfginitmsi(struct passthru_softc *sc) } #endif - if (sc->psc_msi.capoff == 0) /* MSI or bust */ + /* Make sure one of the capabilities is present */ + if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) return (-1); else return (0); } +static int +msix_table_read(struct vmctx *vm, int vcpu, uintptr_t addr, + int size, uint64_t *data, void *arg) +{ + struct passthru_softc *sc; + struct pci_devinst *pi; + int index; + size_t offset, entry_offset; + uint8_t *src8; + uint16_t *src16; + uint32_t *src32; + uint64_t *src64; + struct msix_table_entry *entry; + + sc = arg; + pi = sc->psc_pi; + offset = addr - pi->pi_msix.table_gpa; + entry_offset = addr % MSIX_TABLE_ENTRY_SIZE; + index = offset / MSIX_TABLE_ENTRY_SIZE; + entry = &pi->pi_msix.table[index]; + + switch(size) { + case 1: + src8 = (uint8_t*)((void*)entry + entry_offset); + *data = *src8; + break; + case 2: + src16 = (uint16_t*)((void*)entry + entry_offset); + *data = *src16; + break; + case 4: + src32 = (uint32_t*)((void*)entry + entry_offset); + *data = *src32; + break; + case 8: + src64 = (uint64_t*)((void*)entry + entry_offset); + *data = *src64; + break; + default: + return (-1); + } + + return (0); +} + +static int +msix_table_write(struct vmctx *vm, int vcpu, uintptr_t addr, + int size, uint64_t data, void *arg) +{ + struct passthru_softc *sc; + struct pci_devinst *pi; + int error, index; + size_t offset, entry_offset; + uint32_t *dest; + struct msix_table_entry *entry; + uint32_t vector_control; + + sc = arg; + pi = sc->psc_pi; + offset = addr - pi->pi_msix.table_gpa; + entry_offset = addr % MSIX_TABLE_ENTRY_SIZE; + index = offset / MSIX_TABLE_ENTRY_SIZE; + entry = &pi->pi_msix.table[index]; + + /* Only 4 byte naturally-aligned writes are supported */ + if (size == 4 && entry_offset % 4 == 0) { + vector_control = entry->vector_control; + dest = (uint32_t*)((void*)entry + entry_offset); + *dest = data; + /* If MSI-X hasn't been enabled, do nothing */ + if (pi->pi_msix.enabled) { + /* If the entry is masked, don't set it up */ + if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || + (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { + error = vm_setup_msix(vm, vcpu, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, + index, entry->msg_data, + entry->vector_control, + entry->addr); + if (error) + return (-1); + } + } + } else { + printf("Unsupported unaligned or non-4-byte write to MSI-X table\n"); + return (-1); + } + return (0); +} + +static int +msix_bar_handler(struct pci_devinst *pdi, int idx, uint64_t bar) +{ + uintptr_t start; + + start = (bar & PCIM_BAR_MEM_BASE) + pdi->pi_msix.table_offset; + move_memory_region(pdi->pi_msix.table_bar_region, start); + pdi->pi_msix.table_gpa = start; + return (0); +} + +static int +init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) +{ + int idx; + size_t table_size; + vm_paddr_t start; + size_t len; + struct pci_devinst *pi = sc->psc_pi; + + /* + * If the MSI-X table BAR maps memory intended for + * other uses, it is at least assured that the table + * either resides in its own page within the region, + * or it resides in a page shared with only the PBA. + */ + if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar && + ((pi->pi_msix.pba_offset - pi->pi_msix.table_offset) < 4096)) { + /* Need to also emulate the PBA, not supported yet */ + printf("Unsupported MSI-X table and PBA in same page\n"); + return (-1); + } + /* + * May need to split the BAR into 3 regions: + * Before the MSI-X table, the MSI-X table, and after it + * XXX for now, assume that the table is not in the middle + */ + table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; + pi->pi_msix.table_size = table_size; + idx = pi->pi_msix.table_bar; + + /* Round up to page size */ + table_size = (table_size + 0x1000) & ~0xFFF; + if (pi->pi_msix.table_offset == 0) { + /* Map everything after the MSI-X table */ + start = pi->pi_bar[idx].addr + table_size; + len = pi->pi_bar[idx].size - table_size; + } else { + /* Map everything before the MSI-X table */ + start = pi->pi_bar[idx].addr; + len = pi->pi_msix.table_offset; + } + return vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, sc->psc_sel.pc_func, + start, len, base + table_size); +} + +static int +cfginitmsix(struct passthru_softc *sc) +{ + int table_bar; + struct pci_devinst *pi; + + pi = sc->psc_pi; + table_bar = pi->pi_msix.table_bar; + pi->pi_msix.table_gpa = sc->psc_bar[table_bar].addr + pi->pi_msix.table_offset; + pi->pi_msix.table_bar_region = register_emulated_memory(pi->pi_msix.table_gpa, + pi->pi_msix.table_size, + msix_table_read, + msix_table_write, sc); + if (!pi->pi_msix.table_bar_region) + return (-1); + + pi->pi_bar[table_bar].handler = msix_bar_handler; + + return (0); +} + static int cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) { @@ -262,10 +469,13 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) if (error) return (-1); - /* - * Map the physical MMIO space in the guest MMIO space - */ - if (bartype != PCIBAR_IO) { + /* The MSI-X table needs special handling */ + if (i == pi->pi_msix.table_bar) { + error = init_msix_table(ctx, sc, base); + if (error) + return (-1); + } else if (bartype != PCIBAR_IO) { + /* Map the physical MMIO space in the guest MMIO space */ error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_bar[i].addr, pi->pi_bar[i].size, base); @@ -299,10 +509,13 @@ cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) sc->psc_sel.pc_dev = slot; sc->psc_sel.pc_func = func; + if (cfginitmsi(sc) != 0) + goto done; + if (cfginitbar(ctx, sc) != 0) goto done; - if (cfginitmsi(sc) != 0) + if (cfginitmsix(sc) != 0) goto done; error = 0; /* success */ @@ -381,6 +594,16 @@ msicap_access(struct passthru_softc *sc, int coff) return (0); } +static int +msixcap_access(struct passthru_softc *sc, int coff) +{ + if (sc->psc_msix.capoff == 0) + return (0); + + return (coff >= sc->psc_msix.capoff && + coff < sc->psc_msix.capoff + MSIX_CAPLEN); +} + static int passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t *rv) @@ -416,7 +639,7 @@ static int passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, int bytes, uint32_t val) { - int error; + int error, msix_table_entries, i; struct passthru_softc *sc; sc = pi->pi_arg; @@ -443,6 +666,27 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff, return (0); } + if (msixcap_access(sc, coff)) { + msixcap_cfgwrite(pi, sc->psc_msix.capoff, coff, bytes, val); + if (pi->pi_msix.enabled) { + msix_table_entries = pi->pi_msix.table_count; + for (i = 0; i < msix_table_entries; i++) { + error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus, + sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, + pi->pi_msix.table[i].msg_data, + pi->pi_msix.table[i].vector_control, + pi->pi_msix.table[i].addr); + + if (error) { + printf("vm_setup_msix returned error %d\r\n", errno); + exit(1); + } + } + } + return (0); + } + #ifdef LEGACY_SUPPORT /* * If this device does not support MSI natively then we cannot let -- cgit v1.1