summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-07-12 18:20:18 +0900
committerBjorn Helgaas <bhelgaas@google.com>2016-07-21 15:57:03 -0500
commit4ef33685aa0957d771e068b60a5f3ca6b47ade1c (patch)
tree34e9fee3bd177657e18f56a4d5d0701f6f3c20f5
parentaff171641d181ea573380efc3f559c9de4741fc5 (diff)
downloadop-kernel-dev-4ef33685aa0957d771e068b60a5f3ca6b47ade1c.zip
op-kernel-dev-4ef33685aa0957d771e068b60a5f3ca6b47ade1c.tar.gz
PCI: Spread interrupt vectors in pci_alloc_irq_vectors()
Set the affinity_mask in the PCI device before allocating vectors so that the affinity can be propagated through the MSI descriptor structures to the core IRQ code. To facilitate this, new __pci_enable_msi_range() and __pci_enable_msix_range() helpers are factored out of their not prefixed variants which assigning the new IRQ affinity mask in the PCI device so that the low-level interrupt code can perform the interrupt affinity assignment and do node-local allocations. A new PCI_IRQ_NOAFFINITY flag is added to pci_alloc_irq_vectors() so that this function can also be used by drivers that don't wish to use the automatic affinity assignment. [bhelgaas: omit "else" after "return" consistently] Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Bjorn Helgaas <bhelgaas@google.com> Reviewed-by: Alexander Gordeev <agordeev@redhat.com>
-rw-r--r--Documentation/PCI/MSI-HOWTO.txt4
-rw-r--r--drivers/pci/msi.c134
-rw-r--r--include/linux/pci.h2
3 files changed, 95 insertions, 45 deletions
diff --git a/Documentation/PCI/MSI-HOWTO.txt b/Documentation/PCI/MSI-HOWTO.txt
index 0ac612b..c55df29 100644
--- a/Documentation/PCI/MSI-HOWTO.txt
+++ b/Documentation/PCI/MSI-HOWTO.txt
@@ -99,6 +99,10 @@ PCI_IRQ_NOMSI and PCI_IRQ_NOMSIX flag in case a device claims to support
MSI or MSI-X, but the support is broken, or to pass PCI_IRQ_NOLEGACY in
case the device does not support legacy interrupt lines.
+By default this function will spread the interrupts around the available
+CPUs, but this feature can be disabled by passing the PCI_IRQ_NOAFFINITY
+flag.
+
To get the Linux IRQ numbers passed to request_irq() and free_irq() and the
vectors, use the following function:
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5e5ab47..a02981e 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -569,6 +569,7 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev, int nvec)
entry->msi_attrib.multi_cap = (control & PCI_MSI_FLAGS_QMASK) >> 1;
entry->msi_attrib.multiple = ilog2(__roundup_pow_of_two(nvec));
entry->nvec_used = nvec;
+ entry->affinity = dev->irq_affinity;
if (control & PCI_MSI_FLAGS_64BIT)
entry->mask_pos = dev->msi_cap + PCI_MSI_MASK_64;
@@ -680,10 +681,18 @@ static void __iomem *msix_map_region(struct pci_dev *dev, unsigned nr_entries)
static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
struct msix_entry *entries, int nvec)
{
+ const struct cpumask *mask = NULL;
struct msi_desc *entry;
- int i;
+ int cpu = -1, i;
for (i = 0; i < nvec; i++) {
+ if (dev->irq_affinity) {
+ cpu = cpumask_next(cpu, dev->irq_affinity);
+ if (cpu >= nr_cpu_ids)
+ cpu = cpumask_first(dev->irq_affinity);
+ mask = cpumask_of(cpu);
+ }
+
entry = alloc_msi_entry(&dev->dev);
if (!entry) {
if (!i)
@@ -703,6 +712,7 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
entry->msi_attrib.default_irq = dev->irq;
entry->mask_base = base;
entry->nvec_used = 1;
+ entry->affinity = mask;
list_add_tail(&entry->list, dev_to_msi_list(&dev->dev));
}
@@ -1028,19 +1038,8 @@ int pci_msi_enabled(void)
}
EXPORT_SYMBOL(pci_msi_enabled);
-/**
- * pci_enable_msi_range - configure device's MSI capability structure
- * @dev: device to configure
- * @minvec: minimal number of interrupts to configure
- * @maxvec: maximum number of interrupts to configure
- *
- * This function tries to allocate a maximum possible number of interrupts in a
- * range between @minvec and @maxvec. It returns a negative errno if an error
- * occurs. If it succeeds, it returns the actual number of interrupts allocated
- * and updates the @dev's irq member to the lowest new interrupt number;
- * the other interrupt numbers allocated to this device are consecutive.
- **/
-int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
+static int __pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec,
+ unsigned int flags)
{
int nvec;
int rc;
@@ -1063,25 +1062,85 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
nvec = pci_msi_vec_count(dev);
if (nvec < 0)
return nvec;
- else if (nvec < minvec)
+ if (nvec < minvec)
return -EINVAL;
- else if (nvec > maxvec)
+
+ if (nvec > maxvec)
nvec = maxvec;
- do {
+ for (;;) {
+ if (!(flags & PCI_IRQ_NOAFFINITY)) {
+ dev->irq_affinity = irq_create_affinity_mask(&nvec);
+ if (nvec < minvec)
+ return -ENOSPC;
+ }
+
rc = msi_capability_init(dev, nvec);
- if (rc < 0) {
+ if (rc == 0)
+ return nvec;
+
+ kfree(dev->irq_affinity);
+ dev->irq_affinity = NULL;
+
+ if (rc < 0)
return rc;
- } else if (rc > 0) {
- if (rc < minvec)
+ if (rc < minvec)
+ return -ENOSPC;
+
+ nvec = rc;
+ }
+}
+
+/**
+ * pci_enable_msi_range - configure device's MSI capability structure
+ * @dev: device to configure
+ * @minvec: minimal number of interrupts to configure
+ * @maxvec: maximum number of interrupts to configure
+ *
+ * This function tries to allocate a maximum possible number of interrupts in a
+ * range between @minvec and @maxvec. It returns a negative errno if an error
+ * occurs. If it succeeds, it returns the actual number of interrupts allocated
+ * and updates the @dev's irq member to the lowest new interrupt number;
+ * the other interrupt numbers allocated to this device are consecutive.
+ **/
+int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
+{
+ return __pci_enable_msi_range(dev, minvec, maxvec, PCI_IRQ_NOAFFINITY);
+}
+EXPORT_SYMBOL(pci_enable_msi_range);
+
+static int __pci_enable_msix_range(struct pci_dev *dev,
+ struct msix_entry *entries, int minvec, int maxvec,
+ unsigned int flags)
+{
+ int nvec = maxvec;
+ int rc;
+
+ if (maxvec < minvec)
+ return -ERANGE;
+
+ for (;;) {
+ if (!(flags & PCI_IRQ_NOAFFINITY)) {
+ dev->irq_affinity = irq_create_affinity_mask(&nvec);
+ if (nvec < minvec)
return -ENOSPC;
- nvec = rc;
}
- } while (rc);
- return nvec;
+ rc = pci_enable_msix(dev, entries, nvec);
+ if (rc == 0)
+ return nvec;
+
+ kfree(dev->irq_affinity);
+ dev->irq_affinity = NULL;
+
+ if (rc < 0)
+ return rc;
+ if (rc < minvec)
+ return -ENOSPC;
+
+ nvec = rc;
+ }
}
-EXPORT_SYMBOL(pci_enable_msi_range);
/**
* pci_enable_msix_range - configure device's MSI-X capability structure
@@ -1099,26 +1158,10 @@ EXPORT_SYMBOL(pci_enable_msi_range);
* with new allocated MSI-X interrupts.
**/
int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
- int minvec, int maxvec)
+ int minvec, int maxvec)
{
- int nvec = maxvec;
- int rc;
-
- if (maxvec < minvec)
- return -ERANGE;
-
- do {
- rc = pci_enable_msix(dev, entries, nvec);
- if (rc < 0) {
- return rc;
- } else if (rc > 0) {
- if (rc < minvec)
- return -ENOSPC;
- nvec = rc;
- }
- } while (rc);
-
- return nvec;
+ return __pci_enable_msix_range(dev, entries, minvec, maxvec,
+ PCI_IRQ_NOAFFINITY);
}
EXPORT_SYMBOL(pci_enable_msix_range);
@@ -1145,13 +1188,14 @@ int pci_alloc_irq_vectors(struct pci_dev *dev, unsigned int min_vecs,
int vecs = -ENOSPC;
if (!(flags & PCI_IRQ_NOMSIX)) {
- vecs = pci_enable_msix_range(dev, NULL, min_vecs, max_vecs);
+ vecs = __pci_enable_msix_range(dev, NULL, min_vecs, max_vecs,
+ flags);
if (vecs > 0)
return vecs;
}
if (!(flags & PCI_IRQ_NOMSI)) {
- vecs = pci_enable_msi_range(dev, min_vecs, max_vecs);
+ vecs = __pci_enable_msi_range(dev, min_vecs, max_vecs, flags);
if (vecs > 0)
return vecs;
}
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 52ecd49..f140661 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -320,6 +320,7 @@ struct pci_dev {
* directly, use the values stored here. They might be different!
*/
unsigned int irq;
+ struct cpumask *irq_affinity;
struct resource resource[DEVICE_COUNT_RESOURCE]; /* I/O and memory regions + expansion ROMs */
bool match_driver; /* Skip attaching driver */
@@ -1240,6 +1241,7 @@ int pci_set_vga_state(struct pci_dev *pdev, bool decode,
#define PCI_IRQ_NOLEGACY (1 << 0) /* don't use legacy interrupts */
#define PCI_IRQ_NOMSI (1 << 1) /* don't use MSI interrupts */
#define PCI_IRQ_NOMSIX (1 << 2) /* don't use MSI-X interrupts */
+#define PCI_IRQ_NOAFFINITY (1 << 3) /* don't auto-assign affinity */
/* kmem_cache style wrapper around pci_alloc_consistent() */
OpenPOWER on IntegriCloud