summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2006-11-13 22:23:34 +0000
committerjhb <jhb@FreeBSD.org>2006-11-13 22:23:34 +0000
commitfa70d013974c2fa6347ccc628becfdfcdbddc617 (patch)
tree73f5aafc03e3a710fa43be47c1b42fa68959a2bb
parentd055bdf0ca2fa4ba26cc9a2bf7648eff24fb1cdd (diff)
downloadFreeBSD-src-fa70d013974c2fa6347ccc628becfdfcdbddc617.zip
FreeBSD-src-fa70d013974c2fa6347ccc628becfdfcdbddc617.tar.gz
MD support for PCI Message Signalled Interrupts on amd64 and i386:
- Add a new apic_alloc_vectors() method to the local APIC support code to allocate N contiguous IDT vectors (aligned on a M >= N boundary). This function is used to allocate IDT vectors for a group of MSI messages. - Add MSI and MSI-X PICs. The PIC code here provides methods to manage edge-triggered MSI messages as x86 interrupt sources. In addition to the PIC methods, msi.c also includes methods to allocate and release MSI and MSI-X messages. For x86, we allow for up to 128 different MSI IRQs starting at IRQ 256 (IRQs 0-15 are reserved for ISA IRQs, 16-254 for APIC PCI IRQs, and IRQ 255 is reserved). - Add pcib_(alloc|release)_msi[x]() methods to the MD x86 PCI bridge drivers to bubble the request up to the nexus driver. - Add pcib_(alloc|release)_msi[x]() methods to the x86 nexus drivers that ask the MSI PIC code to allocate resources and IDT vectors. MFC after: 2 months
-rw-r--r--sys/amd64/amd64/local_apic.c62
-rw-r--r--sys/amd64/amd64/mptable_pci.c8
-rw-r--r--sys/amd64/amd64/msi.c511
-rw-r--r--sys/amd64/amd64/nexus.c53
-rw-r--r--sys/amd64/include/apicvar.h1
-rw-r--r--sys/amd64/include/intr_machdep.h21
-rw-r--r--sys/amd64/pci/pci_bus.c4
-rw-r--r--sys/conf/files.amd641
-rw-r--r--sys/conf/files.i3861
-rw-r--r--sys/i386/i386/local_apic.c62
-rw-r--r--sys/i386/i386/mptable_pci.c8
-rw-r--r--sys/i386/i386/msi.c511
-rw-r--r--sys/i386/i386/nexus.c62
-rw-r--r--sys/i386/include/apicvar.h1
-rw-r--r--sys/i386/include/intr_machdep.h21
-rw-r--r--sys/i386/pci/pci_bus.c8
16 files changed, 1327 insertions, 8 deletions
diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c
index a38d43c..14ce4ce 100644
--- a/sys/amd64/amd64/local_apic.c
+++ b/sys/amd64/amd64/local_apic.c
@@ -744,6 +744,65 @@ apic_alloc_vector(u_int irq)
panic("Couldn't find an APIC vector for IRQ %u", irq);
}
+/*
+ * Request 'count' free contiguous IDT vectors to be used by 'count'
+ * IRQs. 'count' must be a power of two and the vectors will be
+ * aligned on a boundary of 'align'. If the request cannot be
+ * satisfied, 0 is returned.
+ */
+u_int
+apic_alloc_vectors(u_int *irqs, u_int count, u_int align)
+{
+ u_int first, run, vector;
+
+ KASSERT(powerof2(count), ("bad count"));
+ KASSERT(powerof2(align), ("bad align"));
+ KASSERT(align >= count, ("align < count"));
+#ifdef INVARIANTS
+ for (run = 0; run < count; run++)
+ KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
+ irqs[run], run));
+#endif
+
+ /*
+ * Search for 'count' free vectors. As with apic_alloc_vector(),
+ * this just uses a simple first fit algorithm.
+ */
+ run = 0;
+ first = 0;
+ mtx_lock_spin(&icu_lock);
+ for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
+
+ /* Vector is in use, end run. */
+ if (ioint_irqs[vector] != 0) {
+ run = 0;
+ first = 0;
+ continue;
+ }
+
+ /* Start a new run if run == 0 and vector is aligned. */
+ if (run == 0) {
+ if ((vector & (align - 1)) != 0)
+ continue;
+ first = vector;
+ }
+ run++;
+
+ /* Keep looping if the run isn't long enough yet. */
+ if (run < count)
+ continue;
+
+ /* Found a run, assign IRQs and return the first vector. */
+ for (vector = 0; vector < count; vector++)
+ ioint_irqs[first + vector] = irqs[vector];
+ mtx_unlock_spin(&icu_lock);
+ return (first + APIC_IO_INTS);
+ }
+ mtx_unlock_spin(&icu_lock);
+ printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
+ return (0);
+}
+
void
apic_enable_vector(u_int vector)
{
@@ -1002,6 +1061,9 @@ apic_setup_io(void *dummy __unused)
intr_register_pic(&lapic_pic);
if (bootverbose)
lapic_dump("BSP");
+
+ /* Enable the MSI "pic". */
+ msi_init();
}
SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL)
diff --git a/sys/amd64/amd64/mptable_pci.c b/sys/amd64/amd64/mptable_pci.c
index 43e48b4..16bc6a9 100644
--- a/sys/amd64/amd64/mptable_pci.c
+++ b/sys/amd64/amd64/mptable_pci.c
@@ -96,6 +96,10 @@ static device_method_t mptable_hostb_methods[] = {
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
{ 0, 0 }
};
@@ -148,6 +152,10 @@ static device_method_t mptable_pcib_pci_methods[] = {
DEVMETHOD(pcib_read_config, pcib_read_config),
DEVMETHOD(pcib_write_config, pcib_write_config),
DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
{0, 0}
};
diff --git a/sys/amd64/amd64/msi.c b/sys/amd64/amd64/msi.c
new file mode 100644
index 0000000..9bbdf83
--- /dev/null
+++ b/sys/amd64/amd64/msi.c
@@ -0,0 +1,511 @@
+/*-
+ * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on
+ * x86 are basically APIC messages that the northbridge delivers directly
+ * to the local APICs as if they had come from an I/O APIC.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/systm.h>
+#include <machine/apicreg.h>
+#include <machine/md_var.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <dev/pci/pcivar.h>
+
+/* Fields in address for Intel MSI messages. */
+#define MSI_INTEL_ADDR_DEST 0x000ff000
+#define MSI_INTEL_ADDR_RH 0x00000008
+# define MSI_INTEL_ADDR_RH_ON 0x00000008
+# define MSI_INTEL_ADDR_RH_OFF 0x00000000
+#define MSI_INTEL_ADDR_DM 0x00000004
+# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000
+# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004
+
+/* Fields in data for Intel MSI messages. */
+#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */
+# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG
+# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL
+#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */
+# define MSI_INTEL_DATA_DEASSERT 0x00000000
+# define MSI_INTEL_DATA_ASSERT 0x00004000
+#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */
+# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
+# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI
+# define MSI_INTEL_DATA_DELSMI IOART_DELSMI
+# define MSI_INTEL_DATA_DELNMI IOART_DELNMI
+# define MSI_INTEL_DATA_DELINIT IOART_DELINIT
+# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT
+#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */
+
+/*
+ * Build Intel MSI message and data values from a source. AMD64 systems
+ * seem to be compatible, so we use the same function for both.
+ */
+#define INTEL_ADDR(msi) \
+ (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \
+ MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
+#define INTEL_DATA(msi) \
+ (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
+
+static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
+
+/*
+ * MSI sources are bunched into groups. This is because MSI forces
+ * all of the messages to share the address and data registers and
+ * thus certain properties (such as the local APIC ID target on x86).
+ * Each group has a 'first' source that contains information global to
+ * the group. These fields are marked with (g) below.
+ *
+ * Note that local APIC ID is kind of special. Each message will be
+ * assigned an ID by the system; however, a group will use the ID from
+ * the first message.
+ *
+ * For MSI-X, each message is isolated, and msi_index indicates the
+ * index of this message in the device's MSI-X table.
+ */
+struct msi_intsrc {
+ struct intsrc msi_intsrc;
+ device_t msi_dev; /* Owning device. (g) */
+ struct msi_intsrc *msi_first; /* First source in group. */
+ u_int msi_irq; /* IRQ cookie. */
+ u_int msi_index; /* Index of this message. */
+ u_int msi_msix; /* MSI-X message. */
+ u_int msi_vector:8; /* IDT vector. */
+ u_int msi_cpu:8; /* Local APIC ID. (g) */
+ u_int msi_count:8; /* Messages in this group. (g) */
+};
+
+static void msi_enable_source(struct intsrc *isrc);
+static void msi_disable_source(struct intsrc *isrc, int eoi);
+static void msi_eoi_source(struct intsrc *isrc);
+static void msi_enable_intr(struct intsrc *isrc);
+static int msi_vector(struct intsrc *isrc);
+static int msi_source_pending(struct intsrc *isrc);
+static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol);
+static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static void msix_enable_intr(struct intsrc *isrc);
+static int msix_source_pending(struct intsrc *isrc);
+static void msix_assign_cpu(struct intsrc *isrc, u_int apic_id);
+
+struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
+ msi_enable_intr, msi_vector, msi_source_pending,
+ NULL, NULL, msi_config_intr, msi_assign_cpu };
+struct pic msix_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
+ msix_enable_intr, msi_vector, msix_source_pending,
+ NULL, NULL, msi_config_intr, msix_assign_cpu };
+
+static int msi_enabled;
+static struct sx msi_sx;
+
+static void
+msi_enable_source(struct intsrc *isrc)
+{
+}
+
+static void
+msi_disable_source(struct intsrc *isrc, int eoi)
+{
+
+ if (eoi == PIC_EOI)
+ lapic_eoi();
+}
+
+static void
+msi_eoi_source(struct intsrc *isrc)
+{
+
+ lapic_eoi();
+}
+
+static void
+msi_enable_intr(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ /*
+ * Since we can only enable the entire group at once, go ahead and
+ * enable the messages when the first message is given a handler.
+ * Note that we assume all devices will register a handler for the
+ * first message.
+ */
+ if (msi->msi_index == 0) {
+ mtx_lock_spin(&icu_lock);
+ pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi));
+ mtx_unlock_spin(&icu_lock);
+ }
+ apic_enable_vector(msi->msi_vector);
+}
+
+static int
+msi_vector(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ return (msi->msi_irq);
+}
+
+static int
+msi_source_pending(struct intsrc *isrc)
+{
+
+ return (0);
+}
+
+static int
+msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol)
+{
+
+ return (ENODEV);
+}
+
+static void
+msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ msi->msi_cpu = apic_id;
+ if (bootverbose)
+ printf("msi: Assigning MSI IRQ %d to local APIC %u\n",
+ msi->msi_irq, msi->msi_cpu);
+ mtx_lock_spin(&icu_lock);
+ if (isrc->is_enabled)
+ pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi));
+ mtx_unlock_spin(&icu_lock);
+}
+
+static void
+msix_enable_intr(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ mtx_lock_spin(&icu_lock);
+ pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi),
+ INTEL_DATA(msi));
+ pci_unmask_msix(msi->msi_dev, msi->msi_index);
+ mtx_unlock_spin(&icu_lock);
+ apic_enable_vector(msi->msi_vector);
+}
+
+static int
+msix_source_pending(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ return (pci_pending_msix(msi->msi_dev, msi->msi_index));
+}
+
+static void
+msix_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ msi->msi_cpu = apic_id;
+ if (bootverbose)
+ printf("msi: Assigning MSI-X IRQ %d to local APIC %u\n",
+ msi->msi_irq, msi->msi_cpu);
+ mtx_lock_spin(&icu_lock);
+ if (isrc->is_enabled)
+ pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi),
+ INTEL_DATA(msi));
+ mtx_unlock_spin(&icu_lock);
+}
+
+void
+msi_init(void)
+{
+
+ /* Check if we have a supported CPU. */
+ if (!(strcmp(cpu_vendor, "GenuineIntel") == 0 ||
+ strcmp(cpu_vendor, "AuthenticAMD") == 0))
+ return;
+
+ msi_enabled = 1;
+ intr_register_pic(&msi_pic);
+ intr_register_pic(&msix_pic);
+ sx_init(&msi_sx, "msi");
+}
+
+/*
+ * Try to allocate 'count' interrupt sources with contiguous IDT values. If
+ * we allocate any new sources, then their IRQ values will be at the end of
+ * the irqs[] array, with *newirq being the index of the first new IRQ value
+ * and *newcount being the number of new IRQ values added.
+ */
+int
+msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq,
+ int *newcount)
+{
+ struct msi_intsrc *msi, *fsrc;
+ int cnt, i, j, vector;
+
+ *newirq = 0;
+ *newcount = 0;
+ if (!msi_enabled)
+ return (ENXIO);
+
+ sx_xlock(&msi_sx);
+
+ /* Try to find 'count' free IRQs. */
+ cnt = 0;
+ for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(i);
+
+ /* End of allocated sources, so break. */
+ if (msi == NULL)
+ break;
+
+ /* If this is a free one, save its IRQ in the array. */
+ if (msi->msi_dev == NULL) {
+ irqs[cnt] = i;
+ cnt++;
+ if (cnt == count)
+ break;
+ }
+ }
+
+ /* Do we need to create some new sources? */
+ if (cnt < count) {
+ /* If we would exceed the max, give up. */
+ if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) {
+ sx_xunlock(&msi_sx);
+ return (ENXIO);
+ }
+
+ /* We need count - cnt more sources starting at index 'cnt'. */
+ *newirq = cnt;
+ *newcount = count - cnt;
+ for (j = 0; j < *newirq; j++) {
+
+ /* Create a new MSI source. */
+ msi = malloc(sizeof(struct msi_intsrc), M_MSI,
+ M_WAITOK | M_ZERO);
+ msi->msi_intsrc.is_pic = &msi_pic;
+ msi->msi_irq = i + j;
+ intr_register_source(&msi->msi_intsrc);
+
+ /* Add it to our array. */
+ irqs[cnt] = i + j;
+ cnt++;
+ }
+ }
+
+ /* Ok, we now have the IRQs allocated. */
+ KASSERT(cnt == count, ("count mismatch"));
+
+ /* Allocate 'count' IDT vectors. */
+ vector = apic_alloc_vectors(irqs, count, maxcount);
+ if (vector == 0) {
+ sx_xunlock(&msi_sx);
+ return (ENOSPC);
+ }
+
+ /* Assign IDT vectors and make these messages owned by 'dev'. */
+ fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
+ for (i = 0; i < count; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
+ msi->msi_intsrc.is_pic = &msi_pic;
+ msi->msi_dev = dev;
+ msi->msi_vector = vector + i;
+ msi->msi_index = i;
+ msi->msi_first = fsrc;
+
+ /* XXX: Somewhat gross. */
+ msi->msi_intsrc.is_enabled = 0;
+ }
+ fsrc->msi_count = count;
+ sx_xunlock(&msi_sx);
+
+ return (0);
+}
+
+int
+msi_release(int *irqs, int count)
+{
+ struct msi_intsrc *msi, *first;
+ int i;
+
+ sx_xlock(&msi_sx);
+ first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
+ if (first == NULL) {
+ sx_xunlock(&msi_sx);
+ return (ENOENT);
+ }
+
+ /* Make sure this isn't an MSI-X message. */
+ if (first->msi_msix) {
+ sx_xunlock(&msi_sx);
+ return (EINVAL);
+ }
+
+ /* Make sure this message is allocated to a group. */
+ if (first->msi_first == NULL) {
+ sx_xunlock(&msi_sx);
+ return (ENXIO);
+ }
+
+ /*
+ * Make sure this is the start of a group and that we are releasing
+ * the entire group.
+ */
+ if (first->msi_first != first || first->msi_count != count) {
+ sx_xunlock(&msi_sx);
+ return (EINVAL);
+ }
+ KASSERT(first->msi_index == 0, ("index mismatch"));
+
+ KASSERT(first->msi_dev != NULL, ("unowned group"));
+
+ /* Clear all the extra messages in the group. */
+ for (i = 1; i < count; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
+ KASSERT(msi->msi_first == first, ("message not in group"));
+ KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
+ msi->msi_first = NULL;
+ msi->msi_dev = NULL;
+ apic_free_vector(msi->msi_vector, msi->msi_irq);
+ msi->msi_vector = 0;
+ msi->msi_index = 0;
+ }
+
+ /* Clear out the first message. */
+ first->msi_first = NULL;
+ first->msi_dev = NULL;
+ apic_free_vector(first->msi_vector, first->msi_irq);
+ first->msi_vector = 0;
+ first->msi_count = 0;
+
+ sx_xunlock(&msi_sx);
+ return (0);
+}
+
+int
+msix_alloc(device_t dev, int index, int *irq, int *new)
+{
+ struct msi_intsrc *msi;
+ int i, vector;
+
+ *new = 0;
+ if (!msi_enabled)
+ return (ENXIO);
+
+ sx_xlock(&msi_sx);
+
+ /* Find a free IRQ. */
+ for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(i);
+
+ /* End of allocated sources, so break. */
+ if (msi == NULL)
+ break;
+
+ /* If this is a free one, start or continue a run. */
+ if (msi->msi_dev == NULL)
+ break;
+ }
+
+ /* Do we need to create a new source? */
+ if (msi == NULL) {
+ /* If we would exceed the max, give up. */
+ if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) {
+ sx_xunlock(&msi_sx);
+ return (ENXIO);
+ }
+
+ /* Create a new source. */
+ *new = 1;
+ msi = malloc(sizeof(struct msi_intsrc), M_MSI,
+ M_WAITOK | M_ZERO);
+ msi->msi_intsrc.is_pic = &msix_pic;
+ msi->msi_irq = i;
+ intr_register_source(&msi->msi_intsrc);
+ }
+
+ /* Allocate an IDT vector. */
+ vector = apic_alloc_vector(i);
+
+ /* Setup source. */
+ msi->msi_intsrc.is_pic = &msix_pic;
+ msi->msi_dev = dev;
+ msi->msi_vector = vector;
+ msi->msi_index = index;
+ msi->msi_msix = 1;
+
+ /* XXX: Somewhat gross. */
+ msi->msi_intsrc.is_enabled = 0;
+ sx_xunlock(&msi_sx);
+
+ *irq = i;
+ return (0);
+}
+
+int
+msix_release(int irq)
+{
+ struct msi_intsrc *msi;
+
+ sx_xlock(&msi_sx);
+ msi = (struct msi_intsrc *)intr_lookup_source(irq);
+ if (msi == NULL) {
+ sx_xunlock(&msi_sx);
+ return (ENOENT);
+ }
+
+ /* Make sure this is an MSI-X message. */
+ if (!msi->msi_msix) {
+ sx_xunlock(&msi_sx);
+ return (EINVAL);
+ }
+
+ KASSERT(msi->msi_dev != NULL, ("unowned message"));
+
+ /* Clear out the message. */
+ msi->msi_dev = NULL;
+ apic_free_vector(msi->msi_vector, msi->msi_irq);
+ msi->msi_vector = 0;
+ msi->msi_index = 0;
+ msi->msi_msix = 0;
+
+ sx_xunlock(&msi_sx);
+ return (0);
+}
diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c
index 478106c..b897b2b 100644
--- a/sys/amd64/amd64/nexus.c
+++ b/sys/amd64/amd64/nexus.c
@@ -61,6 +61,8 @@ __FBSDID("$FreeBSD$");
#include <machine/resource.h>
+#include "pcib_if.h"
+
#ifdef DEV_ISA
#include <isa/isavar.h>
#include <amd64/isa/isa.h>
@@ -100,6 +102,10 @@ static struct resource_list *nexus_get_reslist(device_t dev, device_t child);
static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long);
static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *);
static void nexus_delete_resource(device_t, device_t, int, int);
+static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs);
+static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs);
+static int nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq);
+static int nexus_release_msix(device_t pcib, device_t dev, int irq);
static device_method_t nexus_methods[] = {
/* Device interface */
@@ -125,6 +131,12 @@ static device_method_t nexus_methods[] = {
DEVMETHOD(bus_get_resource, nexus_get_resource),
DEVMETHOD(bus_delete_resource, nexus_delete_resource),
+ /* pcib interface */
+ DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi),
+ DEVMETHOD(pcib_release_msi, nexus_release_msi),
+ DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix),
+ DEVMETHOD(pcib_release_msix, nexus_release_msix),
+
{ 0, 0 }
};
@@ -504,6 +516,47 @@ nexus_delete_resource(device_t dev, device_t child, int type, int rid)
resource_list_delete(rl, type, rid);
}
+static int
+nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq)
+{
+ int error, new;
+
+ error = msix_alloc(dev, index, irq, &new);
+ if (new)
+ rman_manage_region(&irq_rman, *irq, *irq);
+ return (error);
+}
+
+static int
+nexus_release_msix(device_t pcib, device_t dev, int irq)
+{
+
+ return (msix_release(irq));
+}
+
+static int
+nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
+{
+ int error, i, newirq, newcount;
+
+ /* First alloc the messages. */
+ error = msi_alloc(dev, count, maxcount, irqs, &newirq, &newcount);
+
+ /* Always add any new IRQs to the rman, even on failure. */
+ for (i = 0; i < newcount; i++)
+ rman_manage_region(&irq_rman, irqs[newirq + i],
+ irqs[newirq + i]);
+
+ return (error);
+}
+
+static int
+nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs)
+{
+
+ return (msi_release(irqs, count));
+}
+
#ifdef DEV_ISA
/*
* Placeholder which claims PnP 'devices' which describe system
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index ebd8f04..6cc37c6 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -175,6 +175,7 @@ inthand_t
IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint);
u_int apic_alloc_vector(u_int irq);
+u_int apic_alloc_vectors(u_int *irqs, u_int count, u_int align);
void apic_enable_vector(u_int vector);
void apic_free_vector(u_int vector, u_int irq);
u_int apic_idt_to_irq(u_int vector);
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index 5043774..cddec7b 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -43,11 +43,18 @@
* 191 and still be safe since only interrupt sources in actual use will
* allocate IDT vectors.
*
- * For now we stick with 255 as ISA IRQs and PCI intline IRQs only allow
- * for IRQs in the range 0 - 254. When MSI support is added this number
- * will likely increase.
+ * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
+ * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid
+ * confusion since 255 is used in PCI to indicate an invalid IRQ.
*/
-#define NUM_IO_INTS 255
+#define NUM_MSI_INTS 128
+#define FIRST_MSI_INT 256
+#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS)
+
+/*
+ * Default base address for MSI messages on x86 platforms.
+ */
+#define MSI_INTEL_ADDR_BASE 0xfee00000
/*
* - 1 ??? dummy counter.
@@ -140,6 +147,12 @@ int intr_remove_handler(void *cookie);
void intr_resume(void);
void intr_suspend(void);
void intrcnt_add(const char *name, u_long **countp);
+void msi_init(void);
+int msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq,
+ int *newcount);
+int msi_release(int *irqs, int count);
+int msix_alloc(device_t dev, int index, int *irq, int *new);
+int msix_release(int irq);
#endif /* !LOCORE */
#endif /* _KERNEL */
diff --git a/sys/amd64/pci/pci_bus.c b/sys/amd64/pci/pci_bus.c
index 716b050..6efde3a 100644
--- a/sys/amd64/pci/pci_bus.c
+++ b/sys/amd64/pci/pci_bus.c
@@ -322,6 +322,10 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
{ 0, 0 }
};
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 3d3bc8c..b961ab7 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -114,6 +114,7 @@ amd64/amd64/mp_watchdog.c optional mp_watchdog smp
amd64/amd64/mpboot.S optional smp
amd64/amd64/mptable.c optional mptable
amd64/amd64/mptable_pci.c optional mptable pci
+amd64/amd64/msi.c optional pci
amd64/amd64/nexus.c standard
amd64/amd64/pmap.c standard
amd64/amd64/prof_machdep.c optional profiling-routine
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 09fc1b1..3de50fe 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -294,6 +294,7 @@ i386/i386/mp_watchdog.c optional mp_watchdog smp
i386/i386/mpboot.s optional smp
i386/i386/mptable.c optional apic
i386/i386/mptable_pci.c optional apic pci
+i386/i386/msi.c optional apic pci
i386/i386/nexus.c standard
i386/i386/perfmon.c optional perfmon
i386/i386/pmap.c standard
diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c
index 596f236..040b8b1 100644
--- a/sys/i386/i386/local_apic.c
+++ b/sys/i386/i386/local_apic.c
@@ -746,6 +746,65 @@ apic_alloc_vector(u_int irq)
panic("Couldn't find an APIC vector for IRQ %u", irq);
}
+/*
+ * Request 'count' free contiguous IDT vectors to be used by 'count'
+ * IRQs. 'count' must be a power of two and the vectors will be
+ * aligned on a boundary of 'align'. If the request cannot be
+ * satisfied, 0 is returned.
+ */
+u_int
+apic_alloc_vectors(u_int *irqs, u_int count, u_int align)
+{
+ u_int first, run, vector;
+
+ KASSERT(powerof2(count), ("bad count"));
+ KASSERT(powerof2(align), ("bad align"));
+ KASSERT(align >= count, ("align < count"));
+#ifdef INVARIANTS
+ for (run = 0; run < count; run++)
+ KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u",
+ irqs[run], run));
+#endif
+
+ /*
+ * Search for 'count' free vectors. As with apic_alloc_vector(),
+ * this just uses a simple first fit algorithm.
+ */
+ run = 0;
+ first = 0;
+ mtx_lock_spin(&icu_lock);
+ for (vector = 0; vector < APIC_NUM_IOINTS; vector++) {
+
+ /* Vector is in use, end run. */
+ if (ioint_irqs[vector] != 0) {
+ run = 0;
+ first = 0;
+ continue;
+ }
+
+ /* Start a new run if run == 0 and vector is aligned. */
+ if (run == 0) {
+ if ((vector & (align - 1)) != 0)
+ continue;
+ first = vector;
+ }
+ run++;
+
+ /* Keep looping if the run isn't long enough yet. */
+ if (run < count)
+ continue;
+
+ /* Found a run, assign IRQs and return the first vector. */
+ for (vector = 0; vector < count; vector++)
+ ioint_irqs[first + vector] = irqs[vector];
+ mtx_unlock_spin(&icu_lock);
+ return (first + APIC_IO_INTS);
+ }
+ mtx_unlock_spin(&icu_lock);
+ printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count);
+ return (0);
+}
+
void
apic_enable_vector(u_int vector)
{
@@ -1005,6 +1064,9 @@ apic_setup_io(void *dummy __unused)
intr_register_pic(&lapic_pic);
if (bootverbose)
lapic_dump("BSP");
+
+ /* Enable the MSI "pic". */
+ msi_init();
}
SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL)
diff --git a/sys/i386/i386/mptable_pci.c b/sys/i386/i386/mptable_pci.c
index 43e48b4..16bc6a9 100644
--- a/sys/i386/i386/mptable_pci.c
+++ b/sys/i386/i386/mptable_pci.c
@@ -96,6 +96,10 @@ static device_method_t mptable_hostb_methods[] = {
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
{ 0, 0 }
};
@@ -148,6 +152,10 @@ static device_method_t mptable_pcib_pci_methods[] = {
DEVMETHOD(pcib_read_config, pcib_read_config),
DEVMETHOD(pcib_write_config, pcib_write_config),
DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
{0, 0}
};
diff --git a/sys/i386/i386/msi.c b/sys/i386/i386/msi.c
new file mode 100644
index 0000000..9bbdf83
--- /dev/null
+++ b/sys/i386/i386/msi.c
@@ -0,0 +1,511 @@
+/*-
+ * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of any co-contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on
+ * x86 are basically APIC messages that the northbridge delivers directly
+ * to the local APICs as if they had come from an I/O APIC.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/bus.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sx.h>
+#include <sys/systm.h>
+#include <machine/apicreg.h>
+#include <machine/md_var.h>
+#include <machine/frame.h>
+#include <machine/intr_machdep.h>
+#include <machine/apicvar.h>
+#include <dev/pci/pcivar.h>
+
+/* Fields in address for Intel MSI messages. */
+#define MSI_INTEL_ADDR_DEST 0x000ff000
+#define MSI_INTEL_ADDR_RH 0x00000008
+# define MSI_INTEL_ADDR_RH_ON 0x00000008
+# define MSI_INTEL_ADDR_RH_OFF 0x00000000
+#define MSI_INTEL_ADDR_DM 0x00000004
+# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000
+# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004
+
+/* Fields in data for Intel MSI messages. */
+#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */
+# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG
+# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL
+#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */
+# define MSI_INTEL_DATA_DEASSERT 0x00000000
+# define MSI_INTEL_DATA_ASSERT 0x00004000
+#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */
+# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
+# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI
+# define MSI_INTEL_DATA_DELSMI IOART_DELSMI
+# define MSI_INTEL_DATA_DELNMI IOART_DELNMI
+# define MSI_INTEL_DATA_DELINIT IOART_DELINIT
+# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT
+#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */
+
+/*
+ * Build Intel MSI message and data values from a source. AMD64 systems
+ * seem to be compatible, so we use the same function for both.
+ */
+#define INTEL_ADDR(msi) \
+ (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \
+ MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
+#define INTEL_DATA(msi) \
+ (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
+
+static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
+
+/*
+ * MSI sources are bunched into groups. This is because MSI forces
+ * all of the messages to share the address and data registers and
+ * thus certain properties (such as the local APIC ID target on x86).
+ * Each group has a 'first' source that contains information global to
+ * the group. These fields are marked with (g) below.
+ *
+ * Note that local APIC ID is kind of special. Each message will be
+ * assigned an ID by the system; however, a group will use the ID from
+ * the first message.
+ *
+ * For MSI-X, each message is isolated, and msi_index indicates the
+ * index of this message in the device's MSI-X table.
+ */
+struct msi_intsrc {
+ struct intsrc msi_intsrc;
+ device_t msi_dev; /* Owning device. (g) */
+ struct msi_intsrc *msi_first; /* First source in group. */
+ u_int msi_irq; /* IRQ cookie. */
+ u_int msi_index; /* Index of this message. */
+ u_int msi_msix; /* MSI-X message. */
+ u_int msi_vector:8; /* IDT vector. */
+ u_int msi_cpu:8; /* Local APIC ID. (g) */
+ u_int msi_count:8; /* Messages in this group. (g) */
+};
+
+static void msi_enable_source(struct intsrc *isrc);
+static void msi_disable_source(struct intsrc *isrc, int eoi);
+static void msi_eoi_source(struct intsrc *isrc);
+static void msi_enable_intr(struct intsrc *isrc);
+static int msi_vector(struct intsrc *isrc);
+static int msi_source_pending(struct intsrc *isrc);
+static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol);
+static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
+static void msix_enable_intr(struct intsrc *isrc);
+static int msix_source_pending(struct intsrc *isrc);
+static void msix_assign_cpu(struct intsrc *isrc, u_int apic_id);
+
+struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
+ msi_enable_intr, msi_vector, msi_source_pending,
+ NULL, NULL, msi_config_intr, msi_assign_cpu };
+struct pic msix_pic = { msi_enable_source, msi_disable_source, msi_eoi_source,
+ msix_enable_intr, msi_vector, msix_source_pending,
+ NULL, NULL, msi_config_intr, msix_assign_cpu };
+
+static int msi_enabled;
+static struct sx msi_sx;
+
+static void
+msi_enable_source(struct intsrc *isrc)
+{
+}
+
+static void
+msi_disable_source(struct intsrc *isrc, int eoi)
+{
+
+ if (eoi == PIC_EOI)
+ lapic_eoi();
+}
+
+static void
+msi_eoi_source(struct intsrc *isrc)
+{
+
+ lapic_eoi();
+}
+
+static void
+msi_enable_intr(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ /*
+ * Since we can only enable the entire group at once, go ahead and
+ * enable the messages when the first message is given a handler.
+ * Note that we assume all devices will register a handler for the
+ * first message.
+ */
+ if (msi->msi_index == 0) {
+ mtx_lock_spin(&icu_lock);
+ pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi));
+ mtx_unlock_spin(&icu_lock);
+ }
+ apic_enable_vector(msi->msi_vector);
+}
+
+static int
+msi_vector(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ return (msi->msi_irq);
+}
+
+static int
+msi_source_pending(struct intsrc *isrc)
+{
+
+ return (0);
+}
+
+static int
+msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
+ enum intr_polarity pol)
+{
+
+ return (ENODEV);
+}
+
+static void
+msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ msi->msi_cpu = apic_id;
+ if (bootverbose)
+ printf("msi: Assigning MSI IRQ %d to local APIC %u\n",
+ msi->msi_irq, msi->msi_cpu);
+ mtx_lock_spin(&icu_lock);
+ if (isrc->is_enabled)
+ pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi));
+ mtx_unlock_spin(&icu_lock);
+}
+
+static void
+msix_enable_intr(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ mtx_lock_spin(&icu_lock);
+ pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi),
+ INTEL_DATA(msi));
+ pci_unmask_msix(msi->msi_dev, msi->msi_index);
+ mtx_unlock_spin(&icu_lock);
+ apic_enable_vector(msi->msi_vector);
+}
+
+static int
+msix_source_pending(struct intsrc *isrc)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ return (pci_pending_msix(msi->msi_dev, msi->msi_index));
+}
+
+static void
+msix_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+ struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
+
+ msi->msi_cpu = apic_id;
+ if (bootverbose)
+ printf("msi: Assigning MSI-X IRQ %d to local APIC %u\n",
+ msi->msi_irq, msi->msi_cpu);
+ mtx_lock_spin(&icu_lock);
+ if (isrc->is_enabled)
+ pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi),
+ INTEL_DATA(msi));
+ mtx_unlock_spin(&icu_lock);
+}
+
+void
+msi_init(void)
+{
+
+ /* Check if we have a supported CPU. */
+ if (!(strcmp(cpu_vendor, "GenuineIntel") == 0 ||
+ strcmp(cpu_vendor, "AuthenticAMD") == 0))
+ return;
+
+ msi_enabled = 1;
+ intr_register_pic(&msi_pic);
+ intr_register_pic(&msix_pic);
+ sx_init(&msi_sx, "msi");
+}
+
+/*
+ * Try to allocate 'count' interrupt sources with contiguous IDT values. If
+ * we allocate any new sources, then their IRQ values will be at the end of
+ * the irqs[] array, with *newirq being the index of the first new IRQ value
+ * and *newcount being the number of new IRQ values added.
+ */
+int
+msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq,
+ int *newcount)
+{
+ struct msi_intsrc *msi, *fsrc;
+ int cnt, i, j, vector;
+
+ *newirq = 0;
+ *newcount = 0;
+ if (!msi_enabled)
+ return (ENXIO);
+
+ sx_xlock(&msi_sx);
+
+ /* Try to find 'count' free IRQs. */
+ cnt = 0;
+ for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(i);
+
+ /* End of allocated sources, so break. */
+ if (msi == NULL)
+ break;
+
+ /* If this is a free one, save its IRQ in the array. */
+ if (msi->msi_dev == NULL) {
+ irqs[cnt] = i;
+ cnt++;
+ if (cnt == count)
+ break;
+ }
+ }
+
+ /* Do we need to create some new sources? */
+ if (cnt < count) {
+ /* If we would exceed the max, give up. */
+ if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) {
+ sx_xunlock(&msi_sx);
+ return (ENXIO);
+ }
+
+ /* We need count - cnt more sources starting at index 'cnt'. */
+ *newirq = cnt;
+ *newcount = count - cnt;
+ for (j = 0; j < *newirq; j++) {
+
+ /* Create a new MSI source. */
+ msi = malloc(sizeof(struct msi_intsrc), M_MSI,
+ M_WAITOK | M_ZERO);
+ msi->msi_intsrc.is_pic = &msi_pic;
+ msi->msi_irq = i + j;
+ intr_register_source(&msi->msi_intsrc);
+
+ /* Add it to our array. */
+ irqs[cnt] = i + j;
+ cnt++;
+ }
+ }
+
+ /* Ok, we now have the IRQs allocated. */
+ KASSERT(cnt == count, ("count mismatch"));
+
+ /* Allocate 'count' IDT vectors. */
+ vector = apic_alloc_vectors(irqs, count, maxcount);
+ if (vector == 0) {
+ sx_xunlock(&msi_sx);
+ return (ENOSPC);
+ }
+
+ /* Assign IDT vectors and make these messages owned by 'dev'. */
+ fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
+ for (i = 0; i < count; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
+ msi->msi_intsrc.is_pic = &msi_pic;
+ msi->msi_dev = dev;
+ msi->msi_vector = vector + i;
+ msi->msi_index = i;
+ msi->msi_first = fsrc;
+
+ /* XXX: Somewhat gross. */
+ msi->msi_intsrc.is_enabled = 0;
+ }
+ fsrc->msi_count = count;
+ sx_xunlock(&msi_sx);
+
+ return (0);
+}
+
+int
+msi_release(int *irqs, int count)
+{
+ struct msi_intsrc *msi, *first;
+ int i;
+
+ sx_xlock(&msi_sx);
+ first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
+ if (first == NULL) {
+ sx_xunlock(&msi_sx);
+ return (ENOENT);
+ }
+
+ /* Make sure this isn't an MSI-X message. */
+ if (first->msi_msix) {
+ sx_xunlock(&msi_sx);
+ return (EINVAL);
+ }
+
+ /* Make sure this message is allocated to a group. */
+ if (first->msi_first == NULL) {
+ sx_xunlock(&msi_sx);
+ return (ENXIO);
+ }
+
+ /*
+ * Make sure this is the start of a group and that we are releasing
+ * the entire group.
+ */
+ if (first->msi_first != first || first->msi_count != count) {
+ sx_xunlock(&msi_sx);
+ return (EINVAL);
+ }
+ KASSERT(first->msi_index == 0, ("index mismatch"));
+
+ KASSERT(first->msi_dev != NULL, ("unowned group"));
+
+ /* Clear all the extra messages in the group. */
+ for (i = 1; i < count; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
+ KASSERT(msi->msi_first == first, ("message not in group"));
+ KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
+ msi->msi_first = NULL;
+ msi->msi_dev = NULL;
+ apic_free_vector(msi->msi_vector, msi->msi_irq);
+ msi->msi_vector = 0;
+ msi->msi_index = 0;
+ }
+
+ /* Clear out the first message. */
+ first->msi_first = NULL;
+ first->msi_dev = NULL;
+ apic_free_vector(first->msi_vector, first->msi_irq);
+ first->msi_vector = 0;
+ first->msi_count = 0;
+
+ sx_xunlock(&msi_sx);
+ return (0);
+}
+
+int
+msix_alloc(device_t dev, int index, int *irq, int *new)
+{
+ struct msi_intsrc *msi;
+ int i, vector;
+
+ *new = 0;
+ if (!msi_enabled)
+ return (ENXIO);
+
+ sx_xlock(&msi_sx);
+
+ /* Find a free IRQ. */
+ for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) {
+ msi = (struct msi_intsrc *)intr_lookup_source(i);
+
+ /* End of allocated sources, so break. */
+ if (msi == NULL)
+ break;
+
+ /* If this is a free one, start or continue a run. */
+ if (msi->msi_dev == NULL)
+ break;
+ }
+
+ /* Do we need to create a new source? */
+ if (msi == NULL) {
+ /* If we would exceed the max, give up. */
+ if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) {
+ sx_xunlock(&msi_sx);
+ return (ENXIO);
+ }
+
+ /* Create a new source. */
+ *new = 1;
+ msi = malloc(sizeof(struct msi_intsrc), M_MSI,
+ M_WAITOK | M_ZERO);
+ msi->msi_intsrc.is_pic = &msix_pic;
+ msi->msi_irq = i;
+ intr_register_source(&msi->msi_intsrc);
+ }
+
+ /* Allocate an IDT vector. */
+ vector = apic_alloc_vector(i);
+
+ /* Setup source. */
+ msi->msi_intsrc.is_pic = &msix_pic;
+ msi->msi_dev = dev;
+ msi->msi_vector = vector;
+ msi->msi_index = index;
+ msi->msi_msix = 1;
+
+ /* XXX: Somewhat gross. */
+ msi->msi_intsrc.is_enabled = 0;
+ sx_xunlock(&msi_sx);
+
+ *irq = i;
+ return (0);
+}
+
+int
+msix_release(int irq)
+{
+ struct msi_intsrc *msi;
+
+ sx_xlock(&msi_sx);
+ msi = (struct msi_intsrc *)intr_lookup_source(irq);
+ if (msi == NULL) {
+ sx_xunlock(&msi_sx);
+ return (ENOENT);
+ }
+
+ /* Make sure this is an MSI-X message. */
+ if (!msi->msi_msix) {
+ sx_xunlock(&msi_sx);
+ return (EINVAL);
+ }
+
+ KASSERT(msi->msi_dev != NULL, ("unowned message"));
+
+ /* Clear out the message. */
+ msi->msi_dev = NULL;
+ apic_free_vector(msi->msi_vector, msi->msi_irq);
+ msi->msi_vector = 0;
+ msi->msi_index = 0;
+ msi->msi_msix = 0;
+
+ sx_xunlock(&msi_sx);
+ return (0);
+}
diff --git a/sys/i386/i386/nexus.c b/sys/i386/i386/nexus.c
index 6e1e281..5b62f73 100644
--- a/sys/i386/i386/nexus.c
+++ b/sys/i386/i386/nexus.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
* and I/O memory address space.
*/
+#include "opt_apic.h"
#include "opt_isa.h"
#include <sys/param.h>
@@ -61,6 +62,10 @@ __FBSDID("$FreeBSD$");
#include <machine/resource.h>
+#ifdef DEV_APIC
+#include "pcib_if.h"
+#endif
+
#ifdef DEV_ISA
#include <isa/isavar.h>
#ifdef PC98
@@ -104,6 +109,12 @@ static struct resource_list *nexus_get_reslist(device_t dev, device_t child);
static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long);
static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *);
static void nexus_delete_resource(device_t, device_t, int, int);
+#ifdef DEV_APIC
+static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs);
+static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs);
+static int nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq);
+static int nexus_release_msix(device_t pcib, device_t dev, int irq);
+#endif
static device_method_t nexus_methods[] = {
/* Device interface */
@@ -129,6 +140,14 @@ static device_method_t nexus_methods[] = {
DEVMETHOD(bus_get_resource, nexus_get_resource),
DEVMETHOD(bus_delete_resource, nexus_delete_resource),
+ /* pcib interface */
+#ifdef DEV_APIC
+ DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi),
+ DEVMETHOD(pcib_release_msi, nexus_release_msi),
+ DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix),
+ DEVMETHOD(pcib_release_msix, nexus_release_msix),
+#endif
+
{ 0, 0 }
};
@@ -552,6 +571,49 @@ nexus_delete_resource(device_t dev, device_t child, int type, int rid)
resource_list_delete(rl, type, rid);
}
+#ifdef DEV_APIC
+static int
+nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq)
+{
+ int error, new;
+
+ error = msix_alloc(dev, index, irq, &new);
+ if (new)
+ rman_manage_region(&irq_rman, *irq, *irq);
+ return (error);
+}
+
+static int
+nexus_release_msix(device_t pcib, device_t dev, int irq)
+{
+
+ return (msix_release(irq));
+}
+
+static int
+nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs)
+{
+ int error, i, newirq, newcount;
+
+ /* First alloc the messages. */
+ error = msi_alloc(dev, count, maxcount, irqs, &newirq, &newcount);
+
+ /* Always add any new IRQs to the rman, even on failure. */
+ for (i = 0; i < newcount; i++)
+ rman_manage_region(&irq_rman, irqs[newirq + i],
+ irqs[newirq + i]);
+
+ return (error);
+}
+
+static int
+nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs)
+{
+
+ return (msi_release(irqs, count));
+}
+#endif
+
#ifdef DEV_ISA
/*
* Placeholder which claims PnP 'devices' which describe system
diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h
index 99c3028..5116162 100644
--- a/sys/i386/include/apicvar.h
+++ b/sys/i386/include/apicvar.h
@@ -174,6 +174,7 @@ inthand_t
IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint);
u_int apic_alloc_vector(u_int irq);
+u_int apic_alloc_vectors(u_int *irqs, u_int count, u_int align);
void apic_enable_vector(u_int vector);
void apic_free_vector(u_int vector, u_int irq);
u_int apic_idt_to_irq(u_int vector);
diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h
index 4884fd5..bfeba5e 100644
--- a/sys/i386/include/intr_machdep.h
+++ b/sys/i386/include/intr_machdep.h
@@ -43,11 +43,18 @@
* 191 and still be safe since only interrupt sources in actual use will
* allocate IDT vectors.
*
- * For now we stick with 255 as ISA IRQs and PCI intline IRQs only allow
- * for IRQs in the range 0 - 254. When MSI support is added this number
- * will likely increase.
+ * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs.
+ * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid
+ * confusion since 255 is used in PCI to indicate an invalid IRQ.
*/
-#define NUM_IO_INTS 255
+#define NUM_MSI_INTS 128
+#define FIRST_MSI_INT 256
+#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS)
+
+/*
+ * Default base address for MSI messages on x86 platforms.
+ */
+#define MSI_INTEL_ADDR_BASE 0xfee00000
/*
* - 1 ??? dummy counter.
@@ -137,6 +144,12 @@ int intr_remove_handler(void *cookie);
void intr_resume(void);
void intr_suspend(void);
void intrcnt_add(const char *name, u_long **countp);
+void msi_init(void);
+int msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq,
+ int *newcount);
+int msi_release(int* irqs, int count);
+int msix_alloc(device_t dev, int index, int *irq, int *new);
+int msix_release(int irq);
#endif /* !LOCORE */
#endif /* _KERNEL */
diff --git a/sys/i386/pci/pci_bus.c b/sys/i386/pci/pci_bus.c
index 6413d0e..a91d214 100644
--- a/sys/i386/pci/pci_bus.c
+++ b/sys/i386/pci/pci_bus.c
@@ -534,6 +534,10 @@ static device_method_t legacy_pcib_methods[] = {
DEVMETHOD(pcib_read_config, legacy_pcib_read_config),
DEVMETHOD(pcib_write_config, legacy_pcib_write_config),
DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
{ 0, 0 }
};
@@ -623,6 +627,10 @@ static device_method_t pcibios_pcib_pci_methods[] = {
DEVMETHOD(pcib_read_config, pcib_read_config),
DEVMETHOD(pcib_write_config, pcib_write_config),
DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt),
+ DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi),
+ DEVMETHOD(pcib_release_msi, pcib_release_msi),
+ DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix),
+ DEVMETHOD(pcib_release_msix, pcib_release_msix),
{0, 0}
};
OpenPOWER on IntegriCloud