diff options
author | jhb <jhb@FreeBSD.org> | 2007-05-02 17:50:36 +0000 |
---|---|---|
committer | jhb <jhb@FreeBSD.org> | 2007-05-02 17:50:36 +0000 |
commit | ef27a042995e8ab71ff21c9b0b9714068ca9762f (patch) | |
tree | 69dac9819e24c8ebcc3298470c47cfe7cc574930 /sys/dev/pci/pci.c | |
parent | 7b4912a3de1bf63af580b26951df4959ff338571 (diff) | |
download | FreeBSD-src-ef27a042995e8ab71ff21c9b0b9714068ca9762f.zip FreeBSD-src-ef27a042995e8ab71ff21c9b0b9714068ca9762f.tar.gz |
Revamp the MSI/MSI-X code a bit to achieve two main goals:
- Simplify the amount of work that has be done for each architecture by
pushing more of the truly MI code down into the PCI bus driver.
- Don't bind MSI-X indicies to IRQs so that we can allow a driver to map
multiple MSI-X messages into a single IRQ when handling a message
shortage.
The changes include:
- Add a new pcib_if method: PCIB_MAP_MSI() which is called by the PCI bus
to calculate the address and data values for a given MSI/MSI-X IRQ.
The x86 nexus drivers map this into a call to a new 'msi_map()' function
in msi.c that does the mapping.
- Retire the pcib_if method PCIB_REMAP_MSIX() and remove the 'index'
parameter from PCIB_ALLOC_MSIX(). MD code no longer has any knowledge
of the MSI-X index for a given MSI-X IRQ.
- The PCI bus driver now stores more MSI-X state in a child's ivars.
Specifically, it now stores an array of IRQs (called "message vectors" in
the code) that have associated address and data values, and a small
virtual version of the MSI-X table that specifies the message vector
that a given MSI-X table entry uses. Sparse mappings are permitted in
the virtual table.
- The PCI bus driver now configures the MSI and MSI-X address/data
registers directly via custom bus_setup_intr() and bus_teardown_intr()
methods. pci_setup_intr() invokes PCIB_MAP_MSI() to determine the
address and data values for a given message as needed. The MD code
no longer has to call back down into the PCI bus code to set these
values from the nexus' bus_setup_intr() handler.
- The PCI bus code provides a callout (pci_remap_msi_irq()) that the MD
code can call to force the PCI bus to re-invoke PCIB_MAP_MSI() to get
new values of the address and data fields for a given IRQ. The x86
MSI code uses this when an MSI IRQ is moved to a different CPU, requiring
a new value of the 'address' field.
- The x86 MSI psuedo-driver loses a lot of code, and in fact the separate
MSI/MSI-X pseudo-PICs are collapsed down into a single MSI PIC driver
since the only remaining diff between the two is a substring in a
bootverbose printf.
- The PCI bus driver will now restore MSI-X state (including programming
entries in the MSI-X table) on device resume.
- The interface for pci_remap_msix() has changed. Instead of accepting
indices for the allocated vectors, it accepts a mini-virtual table
(with a new length parameter). This table is an array of u_ints, where
each value specifies which allocated message vector to use for the
corresponding MSI-X message. A vector of 0 forces a message to not
have an associated IRQ. The device may choose to only use some of the
IRQs assigned, in which case the unused IRQs must be at the "end" and
will be released back to the system. This allows a driver to use the
same remap table for different shortage values. For example, if a driver
wants 4 messages, it can use the same remap table (which only uses the
first two messages) for the cases when it only gets 2 or 3 messages and
in the latter case the PCI bus will release the 3rd IRQ back to the
system.
MFC after: 1 month
Diffstat (limited to 'sys/dev/pci/pci.c')
-rw-r--r-- | sys/dev/pci/pci.c | 527 |
1 files changed, 437 insertions, 90 deletions
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 52c1ff7..52935e7 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -101,7 +101,16 @@ static void pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data); #endif static void pci_read_vpd(device_t pcib, pcicfgregs *cfg); +static void pci_disable_msi(device_t dev); +static void pci_enable_msi(device_t dev, uint64_t address, + uint16_t data); +static void pci_enable_msix(device_t dev, u_int index, + uint64_t address, uint32_t data); +static void pci_mask_msix(device_t dev, u_int index); +static void pci_unmask_msix(device_t dev, u_int index); static int pci_msi_blacklisted(void); +static void pci_resume_msi(device_t dev); +static void pci_resume_msix(device_t dev); static device_method_t pci_methods[] = { /* Device interface */ @@ -118,8 +127,8 @@ static device_method_t pci_methods[] = { DEVMETHOD(bus_read_ivar, pci_read_ivar), DEVMETHOD(bus_write_ivar, pci_write_ivar), DEVMETHOD(bus_driver_added, pci_driver_added), - DEVMETHOD(bus_setup_intr, bus_generic_setup_intr), - DEVMETHOD(bus_teardown_intr, bus_generic_teardown_intr), + DEVMETHOD(bus_setup_intr, pci_setup_intr), + DEVMETHOD(bus_teardown_intr, pci_teardown_intr), DEVMETHOD(bus_get_resource_list,pci_get_resource_list), DEVMETHOD(bus_set_resource, bus_generic_rl_set_resource), @@ -1016,7 +1025,7 @@ pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data) struct pcicfg_msix *msix = &dinfo->cfg.msix; uint32_t offset; - KASSERT(msix->msix_alloc > index, ("bogus index")); + KASSERT(msix->msix_table_len > index, ("bogus index")); offset = msix->msix_table_offset + index * 16; bus_write_4(msix->msix_table_res, offset, address & 0xffffffff); bus_write_4(msix->msix_table_res, offset + 4, address >> 32); @@ -1046,7 +1055,7 @@ pci_unmask_msix(device_t dev, u_int index) struct pcicfg_msix *msix = &dinfo->cfg.msix; uint32_t offset, val; - KASSERT(msix->msix_alloc > index, ("bogus index")); + KASSERT(msix->msix_table_len > index, ("bogus index")); offset = msix->msix_table_offset + index * 16 + 12; val = bus_read_4(msix->msix_table_res, offset); if (val & PCIM_MSIX_VCTRL_MASK) { @@ -1062,13 +1071,46 @@ pci_pending_msix(device_t dev, u_int index) struct pcicfg_msix *msix = &dinfo->cfg.msix; uint32_t offset, bit; - KASSERT(msix->msix_alloc > index, ("bogus index")); + KASSERT(msix->msix_table_len > index, ("bogus index")); offset = msix->msix_pba_offset + (index / 32) * 4; bit = 1 << index % 32; return (bus_read_4(msix->msix_pba_res, offset) & bit); } /* + * Restore MSI-X registers and table during resume. If MSI-X is + * enabled then walk the virtual table to restore the actual MSI-X + * table. + */ +static void +pci_resume_msix(device_t dev) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + struct pcicfg_msix *msix = &dinfo->cfg.msix; + struct msix_table_entry *mte; + struct msix_vector *mv; + int i; + + if (msix->msix_alloc > 0) { + /* First, mask all vectors. */ + for (i = 0; i < msix->msix_msgnum; i++) + pci_mask_msix(dev, i); + + /* Second, program any messages with at least one handler. */ + for (i = 0; i < msix->msix_table_len; i++) { + mte = &msix->msix_table[i]; + if (mte->mte_vector == 0 || mte->mte_handlers == 0) + continue; + mv = &msix->msix_vectors[mte->mte_vector - 1]; + pci_enable_msix(dev, i, mv->mv_address, mv->mv_data); + pci_unmask_msix(dev, i); + } + } + pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL, + msix->msix_ctrl, 2); +} + +/* * Attempt to allocate *count MSI-X messages. The actual number allocated is * returned in *count. After this function returns, each message will be * available to the driver as SYS_RES_IRQ resources starting at rid 1. @@ -1125,8 +1167,7 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count) max = min(*count, cfg->msix.msix_msgnum); for (i = 0; i < max; i++) { /* Allocate a message. */ - error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, i, - &irq); + error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq); if (error) break; resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq, @@ -1183,6 +1224,17 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count) for (i = 0; i < cfg->msix.msix_msgnum; i++) pci_mask_msix(child, i); + /* Allocate and initialize vector data and virtual table. */ + cfg->msix.msix_vectors = malloc(sizeof(struct msix_vector) * actual, + M_DEVBUF, M_WAITOK | M_ZERO); + cfg->msix.msix_table = malloc(sizeof(struct msix_table_entry) * actual, + M_DEVBUF, M_WAITOK | M_ZERO); + for (i = 0; i < actual; i++) { + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); + cfg->msix.msix_vectors[i].mv_irq = rle->start; + cfg->msix.msix_table[i].mte_vector = i + 1; + } + /* Update control register to enable MSI-X. */ cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE; pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL, @@ -1190,93 +1242,164 @@ pci_alloc_msix_method(device_t dev, device_t child, int *count) /* Update counts of alloc'd messages. */ cfg->msix.msix_alloc = actual; + cfg->msix.msix_table_len = actual; *count = actual; return (0); } /* - * By default, pci_alloc_msix() will assign the allocated IRQ resources to - * the first N messages in the MSI-X table. However, device drivers may - * want to use different layouts in the case that they do not allocate a - * full table. This method allows the driver to specify what layout it - * wants. It must be called after a successful pci_alloc_msix() but - * before any of the associated SYS_RES_IRQ resources are allocated via - * bus_alloc_resource(). The 'indices' array contains N (where N equals - * the 'count' returned from pci_alloc_msix()) message indices. The - * indices are 1-based (meaning the first message is at index 1). On - * successful return, each of the messages in the 'indices' array will - * have an associated SYS_RES_IRQ whose rid is equal to the index. Thus, - * if indices contains { 2, 4 }, then upon successful return, the 'child' - * device will have two SYS_RES_IRQ resources available at rids 2 and 4. + * By default, pci_alloc_msix() will assign the allocated IRQ + * resources consecutively to the first N messages in the MSI-X table. + * However, device drivers may want to use different layouts if they + * either receive fewer messages than they asked for, or they wish to + * populate the MSI-X table sparsely. This method allows the driver + * to specify what layout it wants. It must be called after a + * successful pci_alloc_msix() but before any of the associated + * SYS_RES_IRQ resources are allocated via bus_alloc_resource(). + * + * The 'vectors' array contains 'count' message vectors. The array + * maps directly to the MSI-X table in that index 0 in the array + * specifies the vector for the first message in the MSI-X table, etc. + * The vector value in each array index can either be 0 to indicate + * that no vector should be assigned to a message slot, or it can be a + * number from 1 to N (where N is the count returned from a + * succcessful call to pci_alloc_msix()) to indicate which message + * vector (IRQ) to be used for the corresponding message. + * + * On successful return, each message with a non-zero vector will have + * an associated SYS_RES_IRQ whose rid is equal to the array index + + * 1. Additionally, if any of the IRQs allocated via the previous + * call to pci_alloc_msix() are not used in the mapping, those IRQs + * will be freed back to the system automatically. + * + * For example, suppose a driver has a MSI-X table with 6 messages and + * asks for 6 messages, but pci_alloc_msix() only returns a count of + * 3. Call the three vectors allocated by pci_alloc_msix() A, B, and + * C. After the call to pci_alloc_msix(), the device will be setup to + * have an MSI-X table of ABC--- (where - means no vector assigned). + * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 }, + * then the MSI-X table will look like A-AB-B, and the 'C' vector will + * be freed back to the system. This device will also have valid + * SYS_RES_IRQ rids of 1, 3, 4, and 6. + * + * In any case, the SYS_RES_IRQ rid X will always map to the message + * at MSI-X table index X - 1 and will only be valid if a vector is + * assigned to that table entry. */ int -pci_remap_msix_method(device_t dev, device_t child, u_int *indices) +pci_remap_msix_method(device_t dev, device_t child, int count, + const u_int *vectors) { struct pci_devinfo *dinfo = device_get_ivars(child); - pcicfgregs *cfg = &dinfo->cfg; + struct pcicfg_msix *msix = &dinfo->cfg.msix; struct resource_list_entry *rle; - int count, error, i, j, *irqs; + int i, irq, j, *used; - /* Sanity check the indices. */ - for (i = 0; i < cfg->msix.msix_alloc; i++) - if (indices[i] == 0 || indices[i] > cfg->msix.msix_msgnum) - return (EINVAL); + /* + * Have to have at least one message in the table but the + * table can't be bigger than the actual MSI-X table in the + * device. + */ + if (count == 0 || count > msix->msix_msgnum) + return (EINVAL); - /* Check for duplicates. */ - for (i = 0; i < cfg->msix.msix_alloc; i++) - for (j = i + 1; j < cfg->msix.msix_alloc; j++) - if (indices[i] == indices[j]) - return (EINVAL); + /* Sanity check the vectors. */ + for (i = 0; i < count; i++) + if (vectors[i] > msix->msix_alloc) + return (EINVAL); + /* + * Make sure there aren't any holes in the vectors to be used. + * It's a big pain to support it, and it doesn't really make + * sense anyway. Also, at least one vector must be used. + */ + used = malloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK | + M_ZERO); + for (i = 0; i < count; i++) + if (vectors[i] != 0) + used[vectors[i] - 1] = 1; + for (i = 0; i < msix->msix_alloc - 1; i++) + if (used[i] == 0 && used[i + 1] == 1) { + free(used, M_DEVBUF); + return (EINVAL); + } + if (used[0] != 1) { + free(used, M_DEVBUF); + return (EINVAL); + } + /* Make sure none of the resources are allocated. */ - for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) { - rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i); - if (rle == NULL) + for (i = 0; i < msix->msix_table_len; i++) { + if (msix->msix_table[i].mte_vector == 0) continue; + if (msix->msix_table[i].mte_handlers > 0) + return (EBUSY); + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); + KASSERT(rle != NULL, ("missing resource")); if (rle->res != NULL) return (EBUSY); - count++; } - /* Save the IRQ values and free the existing resources. */ - irqs = malloc(sizeof(int) * cfg->msix.msix_alloc, M_TEMP, M_WAITOK); - for (i = 1, count = 0; count < cfg->msix.msix_alloc; i++) { - rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i); - if (rle == NULL) + /* Free the existing resource list entries. */ + for (i = 0; i < msix->msix_table_len; i++) { + if (msix->msix_table[i].mte_vector == 0) continue; - irqs[count] = rle->start; - resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i); - count++; + resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1); } - /* Map the IRQ values to the new message indices and rids. */ - for (i = 0; i < cfg->msix.msix_alloc; i++) { - resource_list_add(&dinfo->resources, SYS_RES_IRQ, indices[i], - irqs[i], irqs[i], 1); - - /* - * The indices in the backend code (PCIB_* methods and the - * MI helper routines for MD code such as pci_enable_msix()) - * are all zero-based. However, the indices passed to this - * function are 1-based so that the correspond 1:1 with the - * SYS_RES_IRQ resource IDs. - */ - error = PCIB_REMAP_MSIX(device_get_parent(dev), child, - indices[i] - 1, irqs[i]); - KASSERT(error == 0, ("Failed to remap MSI-X message")); + /* + * Build the new virtual table keeping track of which vectors are + * used. + */ + free(msix->msix_table, M_DEVBUF); + msix->msix_table = malloc(sizeof(struct msix_table_entry) * count, + M_DEVBUF, M_WAITOK | M_ZERO); + for (i = 0; i < count; i++) + msix->msix_table[i].mte_vector = vectors[i]; + msix->msix_table_len = count; + + /* Free any unused IRQs and resize the vectors array if necessary. */ + j = msix->msix_alloc - 1; + if (used[j] == 0) { + struct msix_vector *vec; + + while (used[j] == 0) { + PCIB_RELEASE_MSIX(device_get_parent(dev), child, + msix->msix_vectors[j].mv_irq); + j--; + } + vec = malloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF, + M_WAITOK); + bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) * + (j + 1)); + free(msix->msix_vectors, M_DEVBUF); + msix->msix_vectors = vec; + msix->msix_alloc = j + 1; + } + free(used, M_DEVBUF); + + /* Map the IRQs onto the rids. */ + for (i = 0; i < count; i++) { + if (vectors[i] == 0) + continue; + irq = msix->msix_vectors[vectors[i]].mv_irq; + resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq, + irq, 1); } + if (bootverbose) { - if (cfg->msix.msix_alloc == 1) - device_printf(child, - "Remapped MSI-X IRQ to index %d\n", indices[0]); - else { - device_printf(child, "Remapped MSI-X IRQs to indices"); - for (i = 0; i < cfg->msix.msix_alloc - 1; i++) - printf(" %d,", indices[i]); - printf(" %d\n", indices[cfg->msix.msix_alloc - 1]); + device_printf(child, "Remapped MSI-X IRQs as: "); + for (i = 0; i < count; i++) { + if (i != 0) + printf(", "); + if (vectors[i] == 0) + printf("---"); + else + printf("%d", + msix->msix_vectors[vectors[i]].mv_irq); } + printf("\n"); } - free(irqs, M_TEMP); return (0); } @@ -1287,20 +1410,22 @@ pci_release_msix(device_t dev, device_t child) struct pci_devinfo *dinfo = device_get_ivars(child); struct pcicfg_msix *msix = &dinfo->cfg.msix; struct resource_list_entry *rle; - int count, i; + int i; /* Do we have any messages to release? */ if (msix->msix_alloc == 0) return (ENODEV); /* Make sure none of the resources are allocated. */ - for (i = 1, count = 0; count < msix->msix_alloc; i++) { - rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i); - if (rle == NULL) + for (i = 0; i < msix->msix_table_len; i++) { + if (msix->msix_table[i].mte_vector == 0) continue; + if (msix->msix_table[i].mte_handlers > 0) + return (EBUSY); + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); + KASSERT(rle != NULL, ("missing resource")); if (rle->res != NULL) return (EBUSY); - count++; } /* Update control register to disable MSI-X. */ @@ -1308,18 +1433,20 @@ pci_release_msix(device_t dev, device_t child) pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL, msix->msix_ctrl, 2); - /* Release the messages. */ - for (i = 1, count = 0; count < msix->msix_alloc; i++) { - rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i); - if (rle == NULL) + /* Free the resource list entries. */ + for (i = 0; i < msix->msix_table_len; i++) { + if (msix->msix_table[i].mte_vector == 0) continue; - PCIB_RELEASE_MSIX(device_get_parent(dev), child, - rle->start); - resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i); - count++; + resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1); } + free(msix->msix_table, M_DEVBUF); + msix->msix_table_len = 0; - /* Update alloc count. */ + /* Release the IRQs. */ + for (i = 0; i < msix->msix_alloc; i++) + PCIB_RELEASE_MSIX(device_get_parent(dev), child, + msix->msix_vectors[i].mv_irq); + free(msix->msix_vectors, M_DEVBUF); msix->msix_alloc = 0; return (0); } @@ -1351,8 +1478,6 @@ pci_enable_msi(device_t dev, uint64_t address, uint16_t data) struct pcicfg_msi *msi = &dinfo->cfg.msi; /* Write data and address values. */ - msi->msi_addr = address; - msi->msi_data = data; pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR, address & 0xffffffff, 4); if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) { @@ -1370,6 +1495,18 @@ pci_enable_msi(device_t dev, uint64_t address, uint16_t data) 2); } +void +pci_disable_msi(device_t dev) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + struct pcicfg_msi *msi = &dinfo->cfg.msi; + + /* Disable MSI in the control register. */ + msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE; + pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl, + 2); +} + /* * Restore MSI registers during resume. If MSI is enabled then * restore the data and address registers in addition to the control @@ -1401,6 +1538,82 @@ pci_resume_msi(device_t dev) 2); } +int +pci_remap_msi_irq(device_t dev, u_int irq) +{ + struct pci_devinfo *dinfo = device_get_ivars(dev); + pcicfgregs *cfg = &dinfo->cfg; + struct resource_list_entry *rle; + struct msix_table_entry *mte; + struct msix_vector *mv; + device_t bus; + uint64_t addr; + uint32_t data; + int error, i, j; + + bus = device_get_parent(dev); + + /* + * Handle MSI first. We try to find this IRQ among our list + * of MSI IRQs. If we find it, we request updated address and + * data registers and apply the results. + */ + if (cfg->msi.msi_alloc > 0) { + + /* If we don't have any active handlers, nothing to do. */ + if (cfg->msi.msi_handlers == 0) + return (0); + for (i = 0; i < cfg->msi.msi_alloc; i++) { + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, + i + 1); + if (rle->start == irq) { + error = PCIB_MAP_MSI(device_get_parent(bus), + dev, irq, &addr, &data); + if (error) + return (error); + pci_disable_msi(dev); + dinfo->cfg.msi.msi_addr = addr; + dinfo->cfg.msi.msi_data = data; + pci_enable_msi(dev, addr, data); + return (0); + } + } + return (ENOENT); + } + + /* + * For MSI-X, we check to see if we have this IRQ. If we do, + * we request the updated mapping info. If that works, we go + * through all the slots that use this IRQ and update them. + */ + if (cfg->msix.msix_alloc > 0) { + for (i = 0; i < cfg->msix.msix_alloc; i++) { + mv = &cfg->msix.msix_vectors[i]; + if (mv->mv_irq == irq) { + error = PCIB_MAP_MSI(device_get_parent(bus), + dev, irq, &addr, &data); + if (error) + return (error); + mv->mv_address = addr; + mv->mv_data = data; + for (j = 0; j < cfg->msix.msix_table_len; j++) { + mte = &cfg->msix.msix_table[j]; + if (mte->mte_vector != i + 1) + continue; + if (mte->mte_handlers == 0) + continue; + pci_mask_msix(dev, j); + pci_enable_msix(dev, j, addr, data); + pci_unmask_msix(dev, j); + } + } + } + return (ENOENT); + } + + return (ENOENT); +} + /* * Returns true if the specified device is blacklisted because MSI * doesn't work. @@ -1565,6 +1778,7 @@ pci_alloc_msi_method(device_t dev, device_t child, int *count) /* Update counts of alloc'd messages. */ cfg->msi.msi_alloc = actual; + cfg->msi.msi_handlers = 0; *count = actual; return (0); } @@ -1589,6 +1803,8 @@ pci_release_msi_method(device_t dev, device_t child) KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages")); /* Make sure none of the resources are allocated. */ + if (msi->msi_handlers > 0) + return (EBUSY); for (i = 0; i < msi->msi_alloc; i++) { rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1); KASSERT(rle != NULL, ("missing MSI resource")); @@ -1597,8 +1813,10 @@ pci_release_msi_method(device_t dev, device_t child) irqs[i] = rle->start; } - /* Update control register with 0 count and disable MSI. */ - msi->msi_ctrl &= ~(PCIM_MSICTRL_MME_MASK | PCIM_MSICTRL_MSI_ENABLE); + /* Update control register with 0 count. */ + KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE), + ("%s: MSI still enabled", __func__)); + msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK; pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl, 2); @@ -1609,6 +1827,8 @@ pci_release_msi_method(device_t dev, device_t child) /* Update alloc count. */ msi->msi_alloc = 0; + msi->msi_addr = 0; + msi->msi_data = 0; return (0); } @@ -2449,6 +2669,134 @@ pci_driver_added(device_t dev, driver_t *driver) } int +pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags, + driver_filter_t *filter, driver_intr_t *intr, void *arg, void **cookiep) +{ + struct pci_devinfo *dinfo; + struct msix_table_entry *mte; + struct msix_vector *mv; + uint64_t addr; + uint32_t data; + void *cookie; + int error, rid; + + error = bus_generic_setup_intr(dev, child, irq, flags, filter, intr, + arg, &cookie); + if (error) + return (error); + + /* + * If this is a direct child, check to see if the interrupt is + * MSI or MSI-X. If so, ask our parent to map the MSI and give + * us the address and data register values. If we fail for some + * reason, teardown the interrupt handler. + */ + rid = rman_get_rid(irq); + if (device_get_parent(child) == dev && rid > 0) { + dinfo = device_get_ivars(child); + if (dinfo->cfg.msi.msi_alloc > 0) { + if (dinfo->cfg.msi.msi_addr == 0) { + KASSERT(dinfo->cfg.msi.msi_handlers == 0, + ("MSI has handlers, but vectors not mapped")); + error = PCIB_MAP_MSI(device_get_parent(dev), + child, rman_get_start(irq), &addr, &data); + if (error) + goto bad; + dinfo->cfg.msi.msi_addr = addr; + dinfo->cfg.msi.msi_data = data; + pci_enable_msi(child, addr, data); + } + dinfo->cfg.msi.msi_handlers++; + } else { + KASSERT(dinfo->cfg.msix.msix_alloc > 0, + ("No MSI or MSI-X interrupts allocated")); + KASSERT(rid <= dinfo->cfg.msix.msix_table_len, + ("MSI-X index too high")); + mte = &dinfo->cfg.msix.msix_table[rid - 1]; + KASSERT(mte->mte_vector != 0, ("no message vector")); + mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1]; + KASSERT(mv->mv_irq == rman_get_start(irq), + ("IRQ mismatch")); + if (mv->mv_address == 0) { + KASSERT(mte->mte_handlers == 0, + ("MSI-X table entry has handlers, but vector not mapped")); + error = PCIB_MAP_MSI(device_get_parent(dev), + child, rman_get_start(irq), &addr, &data); + if (error) + goto bad; + mv->mv_address = addr; + mv->mv_data = data; + } + if (mte->mte_handlers == 0) { + pci_enable_msix(child, rid - 1, mv->mv_address, + mv->mv_data); + pci_unmask_msix(child, rid - 1); + } + mte->mte_handlers++; + } + bad: + if (error) { + (void)bus_generic_teardown_intr(dev, child, irq, + cookie); + return (error); + } + } + *cookiep = cookie; + return (0); +} + +int +pci_teardown_intr(device_t dev, device_t child, struct resource *irq, + void *cookie) +{ + struct msix_table_entry *mte; + struct resource_list_entry *rle; + struct pci_devinfo *dinfo; + int error, rid; + + /* + * If this is a direct child, check to see if the interrupt is + * MSI or MSI-X. If so, decrement the appropriate handlers + * count and mask the MSI-X message, or disable MSI messages + * if the count drops to 0. + */ + if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE)) + return (EINVAL); + rid = rman_get_rid(irq); + if (device_get_parent(child) == dev && rid > 0) { + dinfo = device_get_ivars(child); + rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid); + if (rle->res != irq) + return (EINVAL); + if (dinfo->cfg.msi.msi_alloc > 0) { + KASSERT(rid <= dinfo->cfg.msi.msi_alloc, + ("MSI-X index too high")); + if (dinfo->cfg.msi.msi_handlers == 0) + return (EINVAL); + dinfo->cfg.msi.msi_handlers--; + if (dinfo->cfg.msi.msi_handlers == 0) + pci_disable_msi(child); + } else { + KASSERT(dinfo->cfg.msix.msix_alloc > 0, + ("No MSI or MSI-X interrupts allocated")); + KASSERT(rid <= dinfo->cfg.msix.msix_table_len, + ("MSI-X index too high")); + mte = &dinfo->cfg.msix.msix_table[rid - 1]; + if (mte->mte_handlers == 0) + return (EINVAL); + mte->mte_handlers--; + if (mte->mte_handlers == 0) + pci_mask_msix(child, rid - 1); + } + } + error = bus_generic_teardown_intr(dev, child, irq, cookie); + if (device_get_parent(child) == dev && rid > 0) + KASSERT(error == 0, + ("%s: generic teardown failed for MSI/MSI-X", __func__)); + return (error); +} + +int pci_print_child(device_t dev, device_t child) { struct pci_devinfo *dinfo; @@ -3222,12 +3570,11 @@ pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo) pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1); pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1); - /* - * Restore MSI configuration if it is present. If MSI is enabled, - * then restore the data and addr registers. - */ + /* Restore MSI and MSI-X configurations if they are present. */ if (dinfo->cfg.msi.msi_location != 0) pci_resume_msi(dev); + if (dinfo->cfg.msix.msix_location != 0) + pci_resume_msix(dev); } void |