diff options
-rw-r--r-- | usr.sbin/bhyve/acpi.c | 2 | ||||
-rw-r--r-- | usr.sbin/bhyve/bhyve.8 | 13 | ||||
-rw-r--r-- | usr.sbin/bhyve/bhyverun.c | 15 | ||||
-rw-r--r-- | usr.sbin/bhyve/mptbl.c | 35 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_emul.c | 461 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_emul.h | 18 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_lpc.c | 14 | ||||
-rw-r--r-- | usr.sbin/bhyve/pci_passthru.c | 66 |
8 files changed, 426 insertions, 198 deletions
diff --git a/usr.sbin/bhyve/acpi.c b/usr.sbin/bhyve/acpi.c index a918e06..db7f0eb 100644 --- a/usr.sbin/bhyve/acpi.c +++ b/usr.sbin/bhyve/acpi.c @@ -713,7 +713,7 @@ basl_fwrite_dsdt(FILE *fp) pci_write_dsdt(); dsdt_line(""); - dsdt_line(" Scope (_SB.PCI0)"); + dsdt_line(" Scope (_SB.PC00)"); dsdt_line(" {"); dsdt_line(" Device (HPET)"); dsdt_line(" {"); diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index 74a840d..60c30b9 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -100,12 +100,16 @@ There are 32 available slots, with the option of providing up to 8 functions per slot. .Bl -tag -width 10n .It Ar slot -.Ar pcislot Ns Op Ar :function +.Ar pcislot[:function] +.Ar bus:pcislot:function .Pp The .Ar pcislot -value is 0 to 31 and the optional function value is 0 to 7. +value is 0 to 31. The optional function value is 0 to 7. The optional +.Ar bus +value is 0 to 255. If not specified, the function value defaults to 0. +If not specified, the bus value defaults to 0. .It Ar emulation .Bl -tag -width 10n .It Li hostbridge | Li amd_hostbridge @@ -130,7 +134,8 @@ AHCI controller attached to a SATA hard-drive. .It Li uart PCI 16550 serial device. .It Li lpc -LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. +LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. The LPC bridge +emulation can only be configured on bus 0. .El .It Op Ar conf This optional parameter describes the backend for device emulations. @@ -227,6 +232,8 @@ This is intended for debug purposes. Ignore accesses to unimplemented Model Specific Registers (MSRs). This is intended for debug purposes. .It Fl x The guest's local APIC is configured in x2APIC mode. +.It Fl Y +Disable MPtable generation. .It Fl h Print help message and exit. .It Ar vmname diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index cf35841..efff6f0 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -144,6 +144,7 @@ usage(int code) " -m: memory size in MB\n" " -w: ignore unimplemented MSRs\n" " -x: local apic is in x2APIC mode\n" + " -Y: disable MPtable generation\n" " -U: uuid\n", progname, (int)strlen(progname), ""); @@ -591,7 +592,7 @@ int main(int argc, char *argv[]) { int c, error, gdb_port, err, bvmcons; - int max_vcpus; + int max_vcpus, mptgen; struct vmctx *ctx; uint64_t rip; size_t memsize; @@ -601,8 +602,9 @@ main(int argc, char *argv[]) gdb_port = 0; guest_ncpus = 1; memsize = 256 * MB; + mptgen = 1; - while ((c = getopt(argc, argv, "abehwxAHIPWp:g:c:s:m:l:U:")) != -1) { + while ((c = getopt(argc, argv, "abehwxAHIPWYp:g:c:s:m:l:U:")) != -1) { switch (c) { case 'a': x2apic_mode = 0; @@ -668,6 +670,9 @@ main(int argc, char *argv[]) case 'x': x2apic_mode = 1; break; + case 'Y': + mptgen = 0; + break; case 'h': usage(0); default: @@ -727,7 +732,11 @@ main(int argc, char *argv[]) /* * build the guest tables, MP etc. */ - mptable_build(ctx, guest_ncpus); + if (mptgen) { + error = mptable_build(ctx, guest_ncpus); + if (error) + exit(1); + } error = smbios_build(ctx); assert(error == 0); diff --git a/usr.sbin/bhyve/mptbl.c b/usr.sbin/bhyve/mptbl.c index 3608efa..4c2167e 100644 --- a/usr.sbin/bhyve/mptbl.c +++ b/usr.sbin/bhyve/mptbl.c @@ -196,16 +196,21 @@ mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id) static int mpt_count_ioint_entries(void) { + int bus, count; + + count = 0; + for (bus = 0; bus <= PCI_BUSMAX; bus++) + count += pci_count_lintr(bus); /* * Always include entries for the first 16 pins along with a entry * for each active PCI INTx pin. */ - return (16 + pci_count_lintr()); + return (16 + count); } static void -mpt_generate_pci_int(int slot, int pin, int ioapic_irq, void *arg) +mpt_generate_pci_int(int bus, int slot, int pin, int ioapic_irq, void *arg) { int_entry_ptr *mpiep, mpie; @@ -219,7 +224,7 @@ mpt_generate_pci_int(int slot, int pin, int ioapic_irq, void *arg) */ mpie->type = MPCT_ENTRY_INT; mpie->int_type = INTENTRY_TYPE_INT; - mpie->src_bus_id = 0; + mpie->src_bus_id = bus; mpie->src_bus_irq = slot << 2 | (pin - 1); mpie->dst_apic_id = mpie[-1].dst_apic_id; mpie->dst_apic_int = ioapic_irq; @@ -230,7 +235,7 @@ mpt_generate_pci_int(int slot, int pin, int ioapic_irq, void *arg) static void mpt_build_ioint_entries(int_entry_ptr mpie, int id) { - int pin; + int pin, bus; /* * The following config is taken from kernel mptable.c @@ -277,7 +282,8 @@ mpt_build_ioint_entries(int_entry_ptr mpie, int id) } /* Next, generate entries for any PCI INTx interrupts. */ - pci_walk_lintr(mpt_generate_pci_int, &mpie); + for (bus = 0; bus <= PCI_BUSMAX; bus++) + pci_walk_lintr(bus, mpt_generate_pci_int, &mpie); } void @@ -297,16 +303,31 @@ mptable_build(struct vmctx *ctx, int ncpu) proc_entry_ptr mpep; mpfps_t mpfp; int_entry_ptr mpie; - int ioints; + int ioints, bus; char *curraddr; char *startaddr; startaddr = paddr_guest2host(ctx, MPTABLE_BASE, MPTABLE_MAX_LENGTH); if (startaddr == NULL) { - printf("mptable requires mapped mem\n"); + fprintf(stderr, "mptable requires mapped mem\n"); return (ENOMEM); } + /* + * There is no way to advertise multiple PCI hierarchies via MPtable + * so require that there is no PCI hierarchy with a non-zero bus + * number. + */ + for (bus = 1; bus <= PCI_BUSMAX; bus++) { + if (pci_bus_configured(bus)) { + fprintf(stderr, "MPtable is incompatible with " + "multiple PCI hierarchies.\r\n"); + fprintf(stderr, "MPtable generation can be disabled " + "by passing the -Y option to bhyve(8).\r\n"); + return (EINVAL); + } + } + curraddr = startaddr; mpfp = (mpfps_t)curraddr; mpt_build_mpfp(mpfp, MPTABLE_BASE); diff --git a/usr.sbin/bhyve/pci_emul.c b/usr.sbin/bhyve/pci_emul.c index 0b50135..8fd19ac 100644 --- a/usr.sbin/bhyve/pci_emul.c +++ b/usr.sbin/bhyve/pci_emul.c @@ -69,6 +69,7 @@ do { \ } \ } while (0) +#define MAXBUSES (PCI_BUSMAX + 1) #define MAXSLOTS (PCI_SLOTMAX + 1) #define MAXFUNCS (PCI_FUNCMAX + 1) @@ -86,7 +87,16 @@ struct intxinfo { struct slotinfo { struct intxinfo si_intpins[4]; struct funcinfo si_funcs[MAXFUNCS]; -} pci_slotinfo[MAXSLOTS]; +}; + +struct businfo { + uint16_t iobase, iolimit; /* I/O window */ + uint32_t membase32, memlimit32; /* mmio window below 4GB */ + uint64_t membase64, memlimit64; /* mmio window above 4GB */ + struct slotinfo slotinfo[MAXSLOTS]; +}; + +static struct businfo *pci_businfo[MAXBUSES]; SET_DECLARE(pci_devemu_set, struct pci_devemu); @@ -97,7 +107,7 @@ static uint64_t pci_emul_membase64; #define PCI_EMUL_IOBASE 0x2000 #define PCI_EMUL_IOLIMIT 0x10000 -#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ +#define PCI_EMUL_MEMLIMIT32 0xE0000000 /* 3.5GB */ #define PCI_EMUL_MEMBASE64 0xD000000000UL #define PCI_EMUL_MEMLIMIT64 0xFD00000000UL @@ -105,7 +115,6 @@ static uint64_t pci_emul_membase64; static struct pci_devemu *pci_emul_finddev(char *name); static void pci_lintr_update(struct pci_devinst *pi); -static int pci_emul_devices; static struct mem_range pci_mem_hole; /* @@ -115,6 +124,7 @@ static struct mem_range pci_mem_hole; /* * Slot options are in the form: * + * <bus>:<slot>:<func>,<emul>[,<config>] * <slot>[:<func>],<emul>[,<config>] * * slot is 0..31 @@ -136,37 +146,53 @@ pci_parse_slot_usage(char *aopt) int pci_parse_slot(char *opt) { - char *slot, *func, *emul, *config; - char *str, *cpy; - int error, snum, fnum; + struct businfo *bi; + struct slotinfo *si; + char *emul, *config, *str, *cp; + int error, bnum, snum, fnum; error = -1; - str = cpy = strdup(opt); - - slot = strsep(&str, ","); - func = NULL; - if (strchr(slot, ':') != NULL) { - func = cpy; - (void) strsep(&func, ":"); - } - - emul = strsep(&str, ","); - config = str; - - if (emul == NULL) { + str = strdup(opt); + + emul = config = NULL; + if ((cp = strchr(str, ',')) != NULL) { + *cp = '\0'; + emul = cp + 1; + if ((cp = strchr(emul, ',')) != NULL) { + *cp = '\0'; + config = cp + 1; + } + } else { pci_parse_slot_usage(opt); goto done; } - snum = atoi(slot); - fnum = func ? atoi(func) : 0; + /* <bus>:<slot>:<func> */ + if (sscanf(str, "%d:%d:%d", &bnum, &snum, &fnum) != 3) { + bnum = 0; + /* <slot>:<func> */ + if (sscanf(str, "%d:%d", &snum, &fnum) != 2) { + fnum = 0; + /* <slot> */ + if (sscanf(str, "%d", &snum) != 1) { + snum = -1; + } + } + } - if (snum < 0 || snum >= MAXSLOTS || fnum < 0 || fnum >= MAXFUNCS) { + if (bnum < 0 || bnum >= MAXBUSES || snum < 0 || snum >= MAXSLOTS || + fnum < 0 || fnum >= MAXFUNCS) { pci_parse_slot_usage(opt); goto done; } - if (pci_slotinfo[snum].si_funcs[fnum].fi_name != NULL) { + if (pci_businfo[bnum] == NULL) + pci_businfo[bnum] = calloc(1, sizeof(struct businfo)); + + bi = pci_businfo[bnum]; + si = &bi->slotinfo[snum]; + + if (si->si_funcs[fnum].fi_name != NULL) { fprintf(stderr, "pci slot %d:%d already occupied!\n", snum, fnum); goto done; @@ -179,12 +205,12 @@ pci_parse_slot(char *opt) } error = 0; - pci_slotinfo[snum].si_funcs[fnum].fi_name = emul; - pci_slotinfo[snum].si_funcs[fnum].fi_param = config; + si->si_funcs[fnum].fi_name = emul; + si->si_funcs[fnum].fi_param = config; done: if (error) - free(cpy); + free(str); return (error); } @@ -346,10 +372,27 @@ pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, offset = addr - pdi->pi_bar[bidx].addr; - if (dir == MEM_F_WRITE) - (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val); - else - *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size); + if (dir == MEM_F_WRITE) { + if (size == 8) { + (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, + 4, *val & 0xffffffff); + (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset + 4, + 4, *val >> 32); + } else { + (*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, + size, *val); + } + } else { + if (size == 8) { + *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, + offset, 4); + *val |= (*pe->pe_barread)(ctx, vcpu, pdi, bidx, + offset + 4, 4) << 32; + } else { + *val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, + offset, size); + } + } return (0); } @@ -604,48 +647,39 @@ pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase, static int pci_emul_add_capability(struct pci_devinst *pi, u_char *capdata, int caplen) { - int i, capoff, capid, reallen; + int i, capoff, reallen; uint16_t sts; - static u_char endofcap[4] = { - PCIY_RESERVED, 0, 0, 0 - }; - - assert(caplen > 0 && capdata[0] != PCIY_RESERVED); + assert(caplen > 0); reallen = roundup2(caplen, 4); /* dword aligned */ sts = pci_get_cfgdata16(pi, PCIR_STATUS); - if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { + if ((sts & PCIM_STATUS_CAPPRESENT) == 0) capoff = CAP_START_OFFSET; - pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); - pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); - } else { - capoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR); - while (1) { - assert((capoff & 0x3) == 0); - capid = pci_get_cfgdata8(pi, capoff); - if (capid == PCIY_RESERVED) - break; - capoff = pci_get_cfgdata8(pi, capoff + 1); - } - } + else + capoff = pi->pi_capend + 1; /* Check if we have enough space */ - if (capoff + reallen + sizeof(endofcap) > PCI_REGMAX + 1) + if (capoff + reallen > PCI_REGMAX + 1) return (-1); + /* Set the previous capability pointer */ + if ((sts & PCIM_STATUS_CAPPRESENT) == 0) { + pci_set_cfgdata8(pi, PCIR_CAP_PTR, capoff); + pci_set_cfgdata16(pi, PCIR_STATUS, sts|PCIM_STATUS_CAPPRESENT); + } else + pci_set_cfgdata8(pi, pi->pi_prevcap + 1, capoff); + /* Copy the capability */ for (i = 0; i < caplen; i++) pci_set_cfgdata8(pi, capoff + i, capdata[i]); /* Set the next capability pointer */ - pci_set_cfgdata8(pi, capoff + 1, capoff + reallen); - - /* Copy of the reserved capability which serves as the end marker */ - for (i = 0; i < sizeof(endofcap); i++) - pci_set_cfgdata8(pi, capoff + reallen + i, endofcap[i]); + pci_set_cfgdata8(pi, capoff + 1, 0); + pi->pi_prevcap = capoff; + pi->pi_capend = capoff + reallen - 1; return (0); } @@ -665,8 +699,8 @@ pci_emul_finddev(char *name) } static int -pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func, - char *params) +pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int bus, int slot, + int func, struct funcinfo *fi) { struct pci_devinst *pdi; int err; @@ -675,7 +709,7 @@ pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func, bzero(pdi, sizeof(*pdi)); pdi->pi_vmctx = ctx; - pdi->pi_bus = 0; + pdi->pi_bus = bus; pdi->pi_slot = slot; pdi->pi_func = func; pthread_mutex_init(&pdi->pi_lintr.lock, NULL); @@ -692,13 +726,11 @@ pci_emul_init(struct vmctx *ctx, struct pci_devemu *pde, int slot, int func, pci_set_cfgdata8(pdi, PCIR_COMMAND, PCIM_CMD_PORTEN | PCIM_CMD_MEMEN | PCIM_CMD_BUSMASTEREN); - err = (*pde->pe_init)(ctx, pdi, params); - if (err != 0) { + err = (*pde->pe_init)(ctx, pdi, fi->fi_param); + if (err == 0) + fi->fi_devi = pdi; + else free(pdi); - } else { - pci_emul_devices++; - pci_slotinfo[slot].si_funcs[func].fi_devi = pdi; - } return (err); } @@ -732,7 +764,7 @@ pci_emul_add_msicap(struct pci_devinst *pi, int msgnum) static void pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, - uint32_t msix_tab_size, int nextptr) + uint32_t msix_tab_size) { CTASSERT(sizeof(struct msixcap) == 12); @@ -740,7 +772,6 @@ pci_populate_msixcap(struct msixcap *msixcap, int msgnum, int barnum, bzero(msixcap, sizeof(struct msixcap)); msixcap->capid = PCIY_MSIX; - msixcap->nextptr = nextptr; /* * Message Control Register, all fields set to @@ -778,7 +809,6 @@ pci_msix_table_init(struct pci_devinst *pi, int table_entries) int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) { - uint16_t pba_index; uint32_t tab_size; struct msixcap msixcap; @@ -795,14 +825,11 @@ pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum) pi->pi_msix.table_offset = 0; pi->pi_msix.table_count = msgnum; pi->pi_msix.pba_offset = tab_size; - - /* calculate the MMIO size required for MSI-X PBA */ - pba_index = (msgnum - 1) / (PBA_TABLE_ENTRY_SIZE * 8); - pi->pi_msix.pba_size = (pba_index + 1) * PBA_TABLE_ENTRY_SIZE; + pi->pi_msix.pba_size = PBA_SIZE(msgnum); pci_msix_table_init(pi, msgnum); - pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size, 0); + pci_populate_msixcap(&msixcap, msgnum, barnum, tab_size); /* allocate memory for MSI-X Table and PBA */ pci_emul_alloc_bar(pi, barnum, PCIBAR_MEM32, @@ -925,11 +952,9 @@ pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) /* Find the capability that we want to update */ capoff = CAP_START_OFFSET; while (1) { - capid = pci_get_cfgdata8(pi, capoff); - if (capid == PCIY_RESERVED) - break; - nextoff = pci_get_cfgdata8(pi, capoff + 1); + if (nextoff == 0) + break; if (offset >= capoff && offset < nextoff) break; @@ -952,6 +977,7 @@ pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) return; } + capid = pci_get_cfgdata8(pi, capoff); switch (capid) { case PCIY_MSI: msicap_cfgwrite(pi, capoff, offset, bytes, val); @@ -970,25 +996,14 @@ pci_emul_capwrite(struct pci_devinst *pi, int offset, int bytes, uint32_t val) static int pci_emul_iscap(struct pci_devinst *pi, int offset) { - int found; uint16_t sts; - uint8_t capid, lastoff; - found = 0; sts = pci_get_cfgdata16(pi, PCIR_STATUS); if ((sts & PCIM_STATUS_CAPPRESENT) != 0) { - lastoff = pci_get_cfgdata8(pi, PCIR_CAP_PTR); - while (1) { - assert((lastoff & 0x3) == 0); - capid = pci_get_cfgdata8(pi, lastoff); - if (capid == PCIY_RESERVED) - break; - lastoff = pci_get_cfgdata8(pi, lastoff + 1); - } - if (offset >= CAP_START_OFFSET && offset <= lastoff) - found = 1; + if (offset >= CAP_START_OFFSET && offset <= pi->pi_capend) + return (1); } - return (found); + return (0); } static int @@ -1006,31 +1021,68 @@ pci_emul_fallback_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr, return (0); } +#define BUSIO_ROUNDUP 32 +#define BUSMEM_ROUNDUP (1024 * 1024) + int init_pci(struct vmctx *ctx) { struct pci_devemu *pde; + struct businfo *bi; + struct slotinfo *si; struct funcinfo *fi; size_t lowmem; - int slot, func; + int bus, slot, func; int error; pci_emul_iobase = PCI_EMUL_IOBASE; pci_emul_membase32 = vm_get_lowmem_limit(ctx); pci_emul_membase64 = PCI_EMUL_MEMBASE64; - for (slot = 0; slot < MAXSLOTS; slot++) { - for (func = 0; func < MAXFUNCS; func++) { - fi = &pci_slotinfo[slot].si_funcs[func]; - if (fi->fi_name != NULL) { + for (bus = 0; bus < MAXBUSES; bus++) { + if ((bi = pci_businfo[bus]) == NULL) + continue; + /* + * Keep track of the i/o and memory resources allocated to + * this bus. + */ + bi->iobase = pci_emul_iobase; + bi->membase32 = pci_emul_membase32; + bi->membase64 = pci_emul_membase64; + + for (slot = 0; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; + for (func = 0; func < MAXFUNCS; func++) { + fi = &si->si_funcs[func]; + if (fi->fi_name == NULL) + continue; pde = pci_emul_finddev(fi->fi_name); assert(pde != NULL); - error = pci_emul_init(ctx, pde, slot, func, - fi->fi_param); + error = pci_emul_init(ctx, pde, bus, slot, + func, fi); if (error) return (error); } } + + /* + * Add some slop to the I/O and memory resources decoded by + * this bus to give a guest some flexibility if it wants to + * reprogram the BARs. + */ + pci_emul_iobase += BUSIO_ROUNDUP; + pci_emul_iobase = roundup2(pci_emul_iobase, BUSIO_ROUNDUP); + bi->iolimit = pci_emul_iobase; + + pci_emul_membase32 += BUSMEM_ROUNDUP; + pci_emul_membase32 = roundup2(pci_emul_membase32, + BUSMEM_ROUNDUP); + bi->memlimit32 = pci_emul_membase32; + + pci_emul_membase64 += BUSMEM_ROUNDUP; + pci_emul_membase64 = roundup2(pci_emul_membase64, + BUSMEM_ROUNDUP); + bi->memlimit64 = pci_emul_membase64; } /* @@ -1060,7 +1112,7 @@ init_pci(struct vmctx *ctx) } static void -pci_prt_entry(int slot, int pin, int ioapic_irq, void *arg) +pci_prt_entry(int bus, int slot, int pin, int ioapic_irq, void *arg) { int *count; @@ -1075,96 +1127,165 @@ pci_prt_entry(int slot, int pin, int ioapic_irq, void *arg) (*count)--; } -void -pci_write_dsdt(void) +/* + * A bhyve virtual machine has a flat PCI hierarchy with a root port + * corresponding to each PCI bus. + */ +static void +pci_bus_write_dsdt(int bus) { + struct businfo *bi; + struct slotinfo *si; struct pci_devinst *pi; int count, slot, func; + /* + * If there are no devices on this 'bus' then just return. + */ + if ((bi = pci_businfo[bus]) == NULL) { + /* + * Bus 0 is special because it decodes the I/O ports used + * for PCI config space access even if there are no devices + * on it. + */ + if (bus != 0) + return; + } + dsdt_indent(1); dsdt_line("Scope (_SB)"); dsdt_line("{"); - dsdt_line(" Device (PCI0)"); + dsdt_line(" Device (PC%02X)", bus); dsdt_line(" {"); dsdt_line(" Name (_HID, EisaId (\"PNP0A03\"))"); dsdt_line(" Name (_ADR, Zero)"); + + dsdt_line(" Method (_BBN, 0, NotSerialized)"); + dsdt_line(" {"); + dsdt_line(" Return (0x%08X)", bus); + dsdt_line(" }"); dsdt_line(" Name (_CRS, ResourceTemplate ()"); dsdt_line(" {"); dsdt_line(" WordBusNumber (ResourceProducer, MinFixed, " "MaxFixed, PosDecode,"); dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0000, // Range Minimum"); - dsdt_line(" 0x00FF, // Range Maximum"); + dsdt_line(" 0x%04X, // Range Minimum", bus); + dsdt_line(" 0x%04X, // Range Maximum", bus); dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x0100, // Length"); + dsdt_line(" 0x0001, // Length"); dsdt_line(" ,, )"); - dsdt_indent(3); - dsdt_fixed_ioport(0xCF8, 8); - dsdt_unindent(3); - dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " - "PosDecode, EntireRange,"); - dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0000, // Range Minimum"); - dsdt_line(" 0x0CF7, // Range Maximum"); - dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0x0CF8, // Length"); - dsdt_line(" ,, , TypeStatic)"); + + if (bus == 0) { + dsdt_indent(3); + dsdt_fixed_ioport(0xCF8, 8); + dsdt_unindent(3); + + dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " + "PosDecode, EntireRange,"); + dsdt_line(" 0x0000, // Granularity"); + dsdt_line(" 0x0000, // Range Minimum"); + dsdt_line(" 0x0CF7, // Range Maximum"); + dsdt_line(" 0x0000, // Translation Offset"); + dsdt_line(" 0x0CF8, // Length"); + dsdt_line(" ,, , TypeStatic)"); + + dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " + "PosDecode, EntireRange,"); + dsdt_line(" 0x0000, // Granularity"); + dsdt_line(" 0x0D00, // Range Minimum"); + dsdt_line(" 0x%04X, // Range Maximum", + PCI_EMUL_IOBASE - 1); + dsdt_line(" 0x0000, // Translation Offset"); + dsdt_line(" 0x%04X, // Length", + PCI_EMUL_IOBASE - 0x0D00); + dsdt_line(" ,, , TypeStatic)"); + + if (bi == NULL) { + dsdt_line(" })"); + goto done; + } + } + assert(bi != NULL); + + /* i/o window */ dsdt_line(" WordIO (ResourceProducer, MinFixed, MaxFixed, " "PosDecode, EntireRange,"); dsdt_line(" 0x0000, // Granularity"); - dsdt_line(" 0x0D00, // Range Minimum"); - dsdt_line(" 0xFFFF, // Range Maximum"); + dsdt_line(" 0x%04X, // Range Minimum", bi->iobase); + dsdt_line(" 0x%04X, // Range Maximum", + bi->iolimit - 1); dsdt_line(" 0x0000, // Translation Offset"); - dsdt_line(" 0xF300, // Length"); + dsdt_line(" 0x%04X, // Length", + bi->iolimit - bi->iobase); dsdt_line(" ,, , TypeStatic)"); + + /* mmio window (32-bit) */ dsdt_line(" DWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x00000000, // Granularity"); - dsdt_line(" 0x%08lX, // Range Minimum\n", - pci_mem_hole.base); + dsdt_line(" 0x%08X, // Range Minimum\n", bi->membase32); dsdt_line(" 0x%08X, // Range Maximum\n", - PCI_EMUL_MEMLIMIT32 - 1); + bi->memlimit32 - 1); dsdt_line(" 0x00000000, // Translation Offset"); - dsdt_line(" 0x%08lX, // Length\n", - PCI_EMUL_MEMLIMIT32 - pci_mem_hole.base); + dsdt_line(" 0x%08X, // Length\n", + bi->memlimit32 - bi->membase32); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); + + /* mmio window (64-bit) */ dsdt_line(" QWordMemory (ResourceProducer, PosDecode, " "MinFixed, MaxFixed, NonCacheable, ReadWrite,"); dsdt_line(" 0x0000000000000000, // Granularity"); - dsdt_line(" 0x%016lX, // Range Minimum\n", - PCI_EMUL_MEMBASE64); + dsdt_line(" 0x%016lX, // Range Minimum\n", bi->membase64); dsdt_line(" 0x%016lX, // Range Maximum\n", - PCI_EMUL_MEMLIMIT64 - 1); + bi->memlimit64 - 1); dsdt_line(" 0x0000000000000000, // Translation Offset"); dsdt_line(" 0x%016lX, // Length\n", - PCI_EMUL_MEMLIMIT64 - PCI_EMUL_MEMBASE64); + bi->memlimit64 - bi->membase64); dsdt_line(" ,, , AddressRangeMemory, TypeStatic)"); dsdt_line(" })"); - count = pci_count_lintr(); + + count = pci_count_lintr(bus); if (count != 0) { dsdt_indent(2); dsdt_line("Name (_PRT, Package (0x%02X)", count); dsdt_line("{"); - pci_walk_lintr(pci_prt_entry, &count); + pci_walk_lintr(bus, pci_prt_entry, &count); dsdt_line("})"); dsdt_unindent(2); } dsdt_indent(2); for (slot = 0; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; for (func = 0; func < MAXFUNCS; func++) { - pi = pci_slotinfo[slot].si_funcs[func].fi_devi; + pi = si->si_funcs[func].fi_devi; if (pi != NULL && pi->pi_d->pe_write_dsdt != NULL) pi->pi_d->pe_write_dsdt(pi); } } dsdt_unindent(2); - +done: dsdt_line(" }"); dsdt_line("}"); dsdt_unindent(1); } +void +pci_write_dsdt(void) +{ + int bus; + + for (bus = 0; bus < MAXBUSES; bus++) + pci_bus_write_dsdt(bus); +} + +int +pci_bus_configured(int bus) +{ + assert(bus >= 0 && bus < MAXBUSES); + return (pci_businfo[bus] != NULL); +} + int pci_msi_enabled(struct pci_devinst *pi) { @@ -1231,13 +1352,17 @@ pci_lintr_permitted(struct pci_devinst *pi) int pci_lintr_request(struct pci_devinst *pi) { + struct businfo *bi; struct slotinfo *si; int bestpin, bestcount, irq, pin; + bi = pci_businfo[pi->pi_bus]; + assert(bi != NULL); + /* * First, allocate a pin from our slot. */ - si = &pci_slotinfo[pi->pi_slot]; + si = &bi->slotinfo[pi->pi_slot]; bestpin = 0; bestcount = si->si_intpins[0].ii_count; for (pin = 1; pin < 4; pin++) { @@ -1318,31 +1443,41 @@ pci_lintr_update(struct pci_devinst *pi) } int -pci_count_lintr(void) +pci_count_lintr(int bus) { int count, slot, pin; + struct slotinfo *slotinfo; count = 0; - for (slot = 0; slot < MAXSLOTS; slot++) { - for (pin = 0; pin < 4; pin++) { - if (pci_slotinfo[slot].si_intpins[pin].ii_count != 0) - count++; + if (pci_businfo[bus] != NULL) { + for (slot = 0; slot < MAXSLOTS; slot++) { + slotinfo = &pci_businfo[bus]->slotinfo[slot]; + for (pin = 0; pin < 4; pin++) { + if (slotinfo->si_intpins[pin].ii_count != 0) + count++; + } } } return (count); } void -pci_walk_lintr(pci_lintr_cb cb, void *arg) +pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg) { + struct businfo *bi; + struct slotinfo *si; struct intxinfo *ii; int slot, pin; + if ((bi = pci_businfo[bus]) == NULL) + return; + for (slot = 0; slot < MAXSLOTS; slot++) { + si = &bi->slotinfo[slot]; for (pin = 0; pin < 4; pin++) { - ii = &pci_slotinfo[slot].si_intpins[pin]; + ii = &si->si_intpins[pin]; if (ii->ii_count != 0) - cb(slot, pin + 1, ii->ii_ioapic_irq, arg); + cb(bus, slot, pin + 1, ii->ii_ioapic_irq, arg); } } } @@ -1352,14 +1487,19 @@ pci_walk_lintr(pci_lintr_cb cb, void *arg) * Return 0 otherwise. */ static int -pci_emul_is_mfdev(int slot) +pci_emul_is_mfdev(int bus, int slot) { + struct businfo *bi; + struct slotinfo *si; int f, numfuncs; numfuncs = 0; - for (f = 0; f < MAXFUNCS; f++) { - if (pci_slotinfo[slot].si_funcs[f].fi_devi != NULL) { - numfuncs++; + if ((bi = pci_businfo[bus]) != NULL) { + si = &bi->slotinfo[slot]; + for (f = 0; f < MAXFUNCS; f++) { + if (si->si_funcs[f].fi_devi != NULL) { + numfuncs++; + } } } return (numfuncs > 1); @@ -1370,12 +1510,12 @@ pci_emul_is_mfdev(int slot) * whether or not is a multi-function being emulated in the pci 'slot'. */ static void -pci_emul_hdrtype_fixup(int slot, int off, int bytes, uint32_t *rv) +pci_emul_hdrtype_fixup(int bus, int slot, int off, int bytes, uint32_t *rv) { int mfdev; if (off <= PCIR_HDRTYPE && off + bytes > PCIR_HDRTYPE) { - mfdev = pci_emul_is_mfdev(slot); + mfdev = pci_emul_is_mfdev(bus, slot); switch (bytes) { case 1: case 2: @@ -1394,7 +1534,7 @@ pci_emul_hdrtype_fixup(int slot, int off, int bytes, uint32_t *rv) } } -static int cfgbus, cfgslot, cfgfunc, cfgoff; +static int cfgenable, cfgbus, cfgslot, cfgfunc, cfgoff; static int pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, @@ -1413,9 +1553,12 @@ pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes, (cfgslot << 11) | (cfgfunc << 8) | cfgoff; - *eax = x | CONF1_ENABLE; + if (cfgenable) + x |= CONF1_ENABLE; + *eax = x; } else { x = *eax; + cfgenable = (x & CONF1_ENABLE) == CONF1_ENABLE; cfgoff = x & PCI_REGMAX; cfgfunc = (x >> 8) & PCI_FUNCMAX; cfgslot = (x >> 11) & PCI_SLOTMAX; @@ -1492,16 +1635,19 @@ static int pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, uint32_t *eax, void *arg) { + struct businfo *bi; + struct slotinfo *si; struct pci_devinst *pi; struct pci_devemu *pe; int coff, idx, needcfg; uint64_t addr, bar, mask; assert(bytes == 1 || bytes == 2 || bytes == 4); - - if (cfgbus == 0) - pi = pci_slotinfo[cfgslot].si_funcs[cfgfunc].fi_devi; - else + + if ((bi = pci_businfo[cfgbus]) != NULL) { + si = &bi->slotinfo[cfgslot]; + pi = si->si_funcs[cfgfunc].fi_devi; + } else pi = NULL; coff = cfgoff + (port - CONF1_DATA_PORT); @@ -1512,10 +1658,11 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, #endif /* - * Just return if there is no device at this cfgslot:cfgfunc or - * if the guest is doing an un-aligned access + * Just return if there is no device at this cfgslot:cfgfunc, + * if the guest is doing an un-aligned access, or if the config + * address word isn't enabled. */ - if (pi == NULL || (coff & (bytes - 1)) != 0) { + if (!cfgenable || pi == NULL || (coff & (bytes - 1)) != 0) { if (in) *eax = 0xffffffff; return (0); @@ -1544,7 +1691,7 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes, *eax = pci_get_cfgdata32(pi, coff); } - pci_emul_hdrtype_fixup(cfgslot, coff, bytes, eax); + pci_emul_hdrtype_fixup(cfgbus, cfgslot, coff, bytes, eax); } else { /* Let the device emulation override the default handler */ if (pe->pe_cfgwrite != NULL && @@ -1646,7 +1793,7 @@ SYSRES_IO(0xC00, 2); /* * Define a dummy test device */ -#define DIOSZ 20 +#define DIOSZ 8 #define DMEMSZ 4096 struct pci_emul_dsoftc { uint8_t ioregs[DIOSZ]; diff --git a/usr.sbin/bhyve/pci_emul.h b/usr.sbin/bhyve/pci_emul.h index 480b631..e1040a8 100644 --- a/usr.sbin/bhyve/pci_emul.h +++ b/usr.sbin/bhyve/pci_emul.h @@ -39,7 +39,6 @@ #include <assert.h> #define PCI_BARMAX PCIR_MAX_BAR_0 /* BAR registers in a Type 0 header */ -#define PCIY_RESERVED 0x00 struct vmctx; struct pci_devinst; @@ -101,7 +100,7 @@ struct msix_table_entry { */ #define MSIX_TABLE_ENTRY_SIZE 16 #define MAX_MSIX_TABLE_ENTRIES 2048 -#define PBA_TABLE_ENTRY_SIZE 8 +#define PBA_SIZE(msgnum) (roundup2((msgnum), 64) / 8) enum lintr_stat { IDLE, @@ -115,6 +114,8 @@ struct pci_devinst { uint8_t pi_bus, pi_slot, pi_func; char pi_name[PI_NAMESZ]; int pi_bar_getsize; + int pi_prevcap; + int pi_capend; struct { int8_t pin; @@ -134,10 +135,10 @@ struct pci_devinst { int enabled; int table_bar; int pba_bar; - size_t table_offset; + uint32_t table_offset; int table_count; - size_t pba_offset; - size_t pba_size; + uint32_t pba_offset; + int pba_size; int function_mask; struct msix_table_entry *table; /* allocated at runtime */ } pi_msix; @@ -199,7 +200,7 @@ struct pciecap { uint16_t slot_status2; } __packed; -typedef void (*pci_lintr_cb)(int slot, int pin, int ioapic_irq, void *arg); +typedef void (*pci_lintr_cb)(int b, int s, int pin, int ioapic_irq, void *arg); int init_pci(struct vmctx *ctx); void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset, @@ -229,9 +230,10 @@ int pci_emul_add_msixcap(struct pci_devinst *pi, int msgnum, int barnum); int pci_emul_msix_twrite(struct pci_devinst *pi, uint64_t offset, int size, uint64_t value); uint64_t pci_emul_msix_tread(struct pci_devinst *pi, uint64_t offset, int size); -int pci_count_lintr(void); -void pci_walk_lintr(pci_lintr_cb cb, void *arg); +int pci_count_lintr(int bus); +void pci_walk_lintr(int bus, pci_lintr_cb cb, void *arg); void pci_write_dsdt(void); +int pci_bus_configured(int bus); static __inline void pci_set_cfgdata8(struct pci_devinst *pi, int offset, uint8_t val) diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c index a1e750d..30b0401 100644 --- a/usr.sbin/bhyve/pci_lpc.c +++ b/usr.sbin/bhyve/pci_lpc.c @@ -277,8 +277,20 @@ pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) /* * Do not allow more than one LPC bridge to be configured. */ - if (lpc_bridge != NULL) + if (lpc_bridge != NULL) { + fprintf(stderr, "Only one LPC bridge is allowed.\n"); return (-1); + } + + /* + * Enforce that the LPC can only be configured on bus 0. This + * simplifies the ACPI DSDT because it can provide a decode for + * all legacy i/o ports behind bus 0. + */ + if (pi->pi_bus != 0) { + fprintf(stderr, "LPC bridge can be present only on bus 0.\n"); + return (-1); + } if (lpc_init() != 0) return (-1); diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index dab5ffc..562d532 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -228,6 +228,7 @@ cfginitmsi(struct passthru_softc *sc) pi->pi_msix.table_offset = msixcap.table_info & ~PCIM_MSIX_BIR_MASK; pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); + pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); /* Allocate the emulated MSI-X table array */ table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; @@ -279,8 +280,10 @@ msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) int index; pi = sc->psc_pi; - offset -= pi->pi_msix.table_offset; + if (offset < pi->pi_msix.table_offset) + return (-1); + offset -= pi->pi_msix.table_offset; index = offset / MSIX_TABLE_ENTRY_SIZE; if (index >= pi->pi_msix.table_count) return (-1); @@ -324,8 +327,10 @@ msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc, int error, index; pi = sc->psc_pi; - offset -= pi->pi_msix.table_offset; + if (offset < pi->pi_msix.table_offset) + return; + offset -= pi->pi_msix.table_offset; index = offset / MSIX_TABLE_ENTRY_SIZE; if (index >= pi->pi_msix.table_count) return; @@ -358,7 +363,9 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) { int b, s, f; int error, idx; - size_t len, remaining, table_size; + size_t len, remaining; + uint32_t table_size, table_offset; + uint32_t pba_size, pba_offset; vm_paddr_t start; struct pci_devinst *pi = sc->psc_pi; @@ -374,24 +381,37 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base) * either resides in its own page within the region, * or it resides in a page shared with only the PBA. */ - if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar && - ((pi->pi_msix.pba_offset - pi->pi_msix.table_offset) < 4096)) { - /* Need to also emulate the PBA, not supported yet */ - printf("Unsupported MSI-X configuration: %d/%d/%d\n", b, s, f); - return (-1); - } + table_offset = rounddown2(pi->pi_msix.table_offset, 4096); - /* Compute the MSI-X table size */ - table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; + table_size = pi->pi_msix.table_offset - table_offset; + table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; table_size = roundup2(table_size, 4096); + if (pi->pi_msix.pba_bar == pi->pi_msix.table_bar) { + pba_offset = pi->pi_msix.pba_offset; + pba_size = pi->pi_msix.pba_size; + if (pba_offset >= table_offset + table_size || + table_offset >= pba_offset + pba_size) { + /* + * The PBA can reside in the same BAR as the MSI-x + * tables as long as it does not overlap with any + * naturally aligned page occupied by the tables. + */ + } else { + /* Need to also emulate the PBA, not supported yet */ + printf("Unsupported MSI-X configuration: %d/%d/%d\n", + b, s, f); + return (-1); + } + } + idx = pi->pi_msix.table_bar; start = pi->pi_bar[idx].addr; remaining = pi->pi_bar[idx].size; /* Map everything before the MSI-X table */ - if (pi->pi_msix.table_offset > 0) { - len = pi->pi_msix.table_offset; + if (table_offset > 0) { + len = table_offset; error = vm_map_pptdev_mmio(ctx, b, s, f, start, len, base); if (error) return (error); @@ -424,7 +444,7 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) struct pci_devinst *pi; struct pci_bar_io bar; enum pcibar_type bartype; - uint64_t base; + uint64_t base, size; pi = sc->psc_pi; @@ -453,15 +473,25 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) } base = bar.pbi_base & PCIM_BAR_MEM_BASE; } + size = bar.pbi_length; + + if (bartype != PCIBAR_IO) { + if (((base | size) & PAGE_MASK) != 0) { + printf("passthru device %d/%d/%d BAR %d: " + "base %#lx or size %#lx not page aligned\n", + sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, + sc->psc_sel.pc_func, i, base, size); + return (-1); + } + } /* Cache information about the "real" BAR */ sc->psc_bar[i].type = bartype; - sc->psc_bar[i].size = bar.pbi_length; + sc->psc_bar[i].size = size; sc->psc_bar[i].addr = base; /* Allocate the BAR in the guest I/O or MMIO space */ - error = pci_emul_alloc_pbar(pi, i, base, bartype, - bar.pbi_length); + error = pci_emul_alloc_pbar(pi, i, base, bartype, size); if (error) return (-1); @@ -471,7 +501,7 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc) if (error) return (-1); } else if (bartype != PCIBAR_IO) { - /* Map the physical MMIO space in the guest MMIO space */ + /* Map the physical BAR in the guest MMIO space */ error = vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus, sc->psc_sel.pc_dev, sc->psc_sel.pc_func, pi->pi_bar[i].addr, pi->pi_bar[i].size, base); |