diff options
author | jhb <jhb@FreeBSD.org> | 2006-11-13 22:23:34 +0000 |
---|---|---|
committer | jhb <jhb@FreeBSD.org> | 2006-11-13 22:23:34 +0000 |
commit | fa70d013974c2fa6347ccc628becfdfcdbddc617 (patch) | |
tree | 73f5aafc03e3a710fa43be47c1b42fa68959a2bb | |
parent | d055bdf0ca2fa4ba26cc9a2bf7648eff24fb1cdd (diff) | |
download | FreeBSD-src-fa70d013974c2fa6347ccc628becfdfcdbddc617.zip FreeBSD-src-fa70d013974c2fa6347ccc628becfdfcdbddc617.tar.gz |
MD support for PCI Message Signalled Interrupts on amd64 and i386:
- Add a new apic_alloc_vectors() method to the local APIC support code
to allocate N contiguous IDT vectors (aligned on a M >= N boundary).
This function is used to allocate IDT vectors for a group of MSI
messages.
- Add MSI and MSI-X PICs. The PIC code here provides methods to manage
edge-triggered MSI messages as x86 interrupt sources. In addition to
the PIC methods, msi.c also includes methods to allocate and release
MSI and MSI-X messages. For x86, we allow for up to 128 different
MSI IRQs starting at IRQ 256 (IRQs 0-15 are reserved for ISA IRQs,
16-254 for APIC PCI IRQs, and IRQ 255 is reserved).
- Add pcib_(alloc|release)_msi[x]() methods to the MD x86 PCI bridge
drivers to bubble the request up to the nexus driver.
- Add pcib_(alloc|release)_msi[x]() methods to the x86 nexus drivers that
ask the MSI PIC code to allocate resources and IDT vectors.
MFC after: 2 months
-rw-r--r-- | sys/amd64/amd64/local_apic.c | 62 | ||||
-rw-r--r-- | sys/amd64/amd64/mptable_pci.c | 8 | ||||
-rw-r--r-- | sys/amd64/amd64/msi.c | 511 | ||||
-rw-r--r-- | sys/amd64/amd64/nexus.c | 53 | ||||
-rw-r--r-- | sys/amd64/include/apicvar.h | 1 | ||||
-rw-r--r-- | sys/amd64/include/intr_machdep.h | 21 | ||||
-rw-r--r-- | sys/amd64/pci/pci_bus.c | 4 | ||||
-rw-r--r-- | sys/conf/files.amd64 | 1 | ||||
-rw-r--r-- | sys/conf/files.i386 | 1 | ||||
-rw-r--r-- | sys/i386/i386/local_apic.c | 62 | ||||
-rw-r--r-- | sys/i386/i386/mptable_pci.c | 8 | ||||
-rw-r--r-- | sys/i386/i386/msi.c | 511 | ||||
-rw-r--r-- | sys/i386/i386/nexus.c | 62 | ||||
-rw-r--r-- | sys/i386/include/apicvar.h | 1 | ||||
-rw-r--r-- | sys/i386/include/intr_machdep.h | 21 | ||||
-rw-r--r-- | sys/i386/pci/pci_bus.c | 8 |
16 files changed, 1327 insertions, 8 deletions
diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c index a38d43c..14ce4ce 100644 --- a/sys/amd64/amd64/local_apic.c +++ b/sys/amd64/amd64/local_apic.c @@ -744,6 +744,65 @@ apic_alloc_vector(u_int irq) panic("Couldn't find an APIC vector for IRQ %u", irq); } +/* + * Request 'count' free contiguous IDT vectors to be used by 'count' + * IRQs. 'count' must be a power of two and the vectors will be + * aligned on a boundary of 'align'. If the request cannot be + * satisfied, 0 is returned. + */ +u_int +apic_alloc_vectors(u_int *irqs, u_int count, u_int align) +{ + u_int first, run, vector; + + KASSERT(powerof2(count), ("bad count")); + KASSERT(powerof2(align), ("bad align")); + KASSERT(align >= count, ("align < count")); +#ifdef INVARIANTS + for (run = 0; run < count; run++) + KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", + irqs[run], run)); +#endif + + /* + * Search for 'count' free vectors. As with apic_alloc_vector(), + * this just uses a simple first fit algorithm. + */ + run = 0; + first = 0; + mtx_lock_spin(&icu_lock); + for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { + + /* Vector is in use, end run. */ + if (ioint_irqs[vector] != 0) { + run = 0; + first = 0; + continue; + } + + /* Start a new run if run == 0 and vector is aligned. */ + if (run == 0) { + if ((vector & (align - 1)) != 0) + continue; + first = vector; + } + run++; + + /* Keep looping if the run isn't long enough yet. */ + if (run < count) + continue; + + /* Found a run, assign IRQs and return the first vector. */ + for (vector = 0; vector < count; vector++) + ioint_irqs[first + vector] = irqs[vector]; + mtx_unlock_spin(&icu_lock); + return (first + APIC_IO_INTS); + } + mtx_unlock_spin(&icu_lock); + printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); + return (0); +} + void apic_enable_vector(u_int vector) { @@ -1002,6 +1061,9 @@ apic_setup_io(void *dummy __unused) intr_register_pic(&lapic_pic); if (bootverbose) lapic_dump("BSP"); + + /* Enable the MSI "pic". */ + msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL) diff --git a/sys/amd64/amd64/mptable_pci.c b/sys/amd64/amd64/mptable_pci.c index 43e48b4..16bc6a9 100644 --- a/sys/amd64/amd64/mptable_pci.c +++ b/sys/amd64/amd64/mptable_pci.c @@ -96,6 +96,10 @@ static device_method_t mptable_hostb_methods[] = { DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; @@ -148,6 +152,10 @@ static device_method_t mptable_pcib_pci_methods[] = { DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} }; diff --git a/sys/amd64/amd64/msi.c b/sys/amd64/amd64/msi.c new file mode 100644 index 0000000..9bbdf83 --- /dev/null +++ b/sys/amd64/amd64/msi.c @@ -0,0 +1,511 @@ +/*- + * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on + * x86 are basically APIC messages that the northbridge delivers directly + * to the local APICs as if they had come from an I/O APIC. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/sx.h> +#include <sys/systm.h> +#include <machine/apicreg.h> +#include <machine/md_var.h> +#include <machine/frame.h> +#include <machine/intr_machdep.h> +#include <machine/apicvar.h> +#include <dev/pci/pcivar.h> + +/* Fields in address for Intel MSI messages. */ +#define MSI_INTEL_ADDR_DEST 0x000ff000 +#define MSI_INTEL_ADDR_RH 0x00000008 +# define MSI_INTEL_ADDR_RH_ON 0x00000008 +# define MSI_INTEL_ADDR_RH_OFF 0x00000000 +#define MSI_INTEL_ADDR_DM 0x00000004 +# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000 +# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004 + +/* Fields in data for Intel MSI messages. */ +#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */ +# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG +# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL +#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */ +# define MSI_INTEL_DATA_DEASSERT 0x00000000 +# define MSI_INTEL_DATA_ASSERT 0x00004000 +#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */ +# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED +# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI +# define MSI_INTEL_DATA_DELSMI IOART_DELSMI +# define MSI_INTEL_DATA_DELNMI IOART_DELNMI +# define MSI_INTEL_DATA_DELINIT IOART_DELINIT +# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT +#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ + +/* + * Build Intel MSI message and data values from a source. AMD64 systems + * seem to be compatible, so we use the same function for both. + */ +#define INTEL_ADDR(msi) \ + (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \ + MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL) +#define INTEL_DATA(msi) \ + (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector) + +static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI"); + +/* + * MSI sources are bunched into groups. This is because MSI forces + * all of the messages to share the address and data registers and + * thus certain properties (such as the local APIC ID target on x86). + * Each group has a 'first' source that contains information global to + * the group. These fields are marked with (g) below. + * + * Note that local APIC ID is kind of special. Each message will be + * assigned an ID by the system; however, a group will use the ID from + * the first message. + * + * For MSI-X, each message is isolated, and msi_index indicates the + * index of this message in the device's MSI-X table. + */ +struct msi_intsrc { + struct intsrc msi_intsrc; + device_t msi_dev; /* Owning device. (g) */ + struct msi_intsrc *msi_first; /* First source in group. */ + u_int msi_irq; /* IRQ cookie. */ + u_int msi_index; /* Index of this message. */ + u_int msi_msix; /* MSI-X message. */ + u_int msi_vector:8; /* IDT vector. */ + u_int msi_cpu:8; /* Local APIC ID. (g) */ + u_int msi_count:8; /* Messages in this group. (g) */ +}; + +static void msi_enable_source(struct intsrc *isrc); +static void msi_disable_source(struct intsrc *isrc, int eoi); +static void msi_eoi_source(struct intsrc *isrc); +static void msi_enable_intr(struct intsrc *isrc); +static int msi_vector(struct intsrc *isrc); +static int msi_source_pending(struct intsrc *isrc); +static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol); +static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id); +static void msix_enable_intr(struct intsrc *isrc); +static int msix_source_pending(struct intsrc *isrc); +static void msix_assign_cpu(struct intsrc *isrc, u_int apic_id); + +struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msi_enable_intr, msi_vector, msi_source_pending, + NULL, NULL, msi_config_intr, msi_assign_cpu }; +struct pic msix_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msix_enable_intr, msi_vector, msix_source_pending, + NULL, NULL, msi_config_intr, msix_assign_cpu }; + +static int msi_enabled; +static struct sx msi_sx; + +static void +msi_enable_source(struct intsrc *isrc) +{ +} + +static void +msi_disable_source(struct intsrc *isrc, int eoi) +{ + + if (eoi == PIC_EOI) + lapic_eoi(); +} + +static void +msi_eoi_source(struct intsrc *isrc) +{ + + lapic_eoi(); +} + +static void +msi_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + /* + * Since we can only enable the entire group at once, go ahead and + * enable the messages when the first message is given a handler. + * Note that we assume all devices will register a handler for the + * first message. + */ + if (msi->msi_index == 0) { + mtx_lock_spin(&icu_lock); + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); + } + apic_enable_vector(msi->msi_vector); +} + +static int +msi_vector(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (msi->msi_irq); +} + +static int +msi_source_pending(struct intsrc *isrc) +{ + + return (0); +} + +static int +msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol) +{ + + return (ENODEV); +} + +static void +msi_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +static void +msix_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + mtx_lock_spin(&icu_lock); + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + pci_unmask_msix(msi->msi_dev, msi->msi_index); + mtx_unlock_spin(&icu_lock); + apic_enable_vector(msi->msi_vector); +} + +static int +msix_source_pending(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (pci_pending_msix(msi->msi_dev, msi->msi_index)); +} + +static void +msix_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI-X IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +void +msi_init(void) +{ + + /* Check if we have a supported CPU. */ + if (!(strcmp(cpu_vendor, "GenuineIntel") == 0 || + strcmp(cpu_vendor, "AuthenticAMD") == 0)) + return; + + msi_enabled = 1; + intr_register_pic(&msi_pic); + intr_register_pic(&msix_pic); + sx_init(&msi_sx, "msi"); +} + +/* + * Try to allocate 'count' interrupt sources with contiguous IDT values. If + * we allocate any new sources, then their IRQ values will be at the end of + * the irqs[] array, with *newirq being the index of the first new IRQ value + * and *newcount being the number of new IRQ values added. + */ +int +msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount) +{ + struct msi_intsrc *msi, *fsrc; + int cnt, i, j, vector; + + *newirq = 0; + *newcount = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* Try to find 'count' free IRQs. */ + cnt = 0; + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, save its IRQ in the array. */ + if (msi->msi_dev == NULL) { + irqs[cnt] = i; + cnt++; + if (cnt == count) + break; + } + } + + /* Do we need to create some new sources? */ + if (cnt < count) { + /* If we would exceed the max, give up. */ + if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* We need count - cnt more sources starting at index 'cnt'. */ + *newirq = cnt; + *newcount = count - cnt; + for (j = 0; j < *newirq; j++) { + + /* Create a new MSI source. */ + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_irq = i + j; + intr_register_source(&msi->msi_intsrc); + + /* Add it to our array. */ + irqs[cnt] = i + j; + cnt++; + } + } + + /* Ok, we now have the IRQs allocated. */ + KASSERT(cnt == count, ("count mismatch")); + + /* Allocate 'count' IDT vectors. */ + vector = apic_alloc_vectors(irqs, count, maxcount); + if (vector == 0) { + sx_xunlock(&msi_sx); + return (ENOSPC); + } + + /* Assign IDT vectors and make these messages owned by 'dev'. */ + fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + for (i = 0; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_dev = dev; + msi->msi_vector = vector + i; + msi->msi_index = i; + msi->msi_first = fsrc; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + } + fsrc->msi_count = count; + sx_xunlock(&msi_sx); + + return (0); +} + +int +msi_release(int *irqs, int count) +{ + struct msi_intsrc *msi, *first; + int i; + + sx_xlock(&msi_sx); + first = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + if (first == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this isn't an MSI-X message. */ + if (first->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + /* Make sure this message is allocated to a group. */ + if (first->msi_first == NULL) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* + * Make sure this is the start of a group and that we are releasing + * the entire group. + */ + if (first->msi_first != first || first->msi_count != count) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + KASSERT(first->msi_index == 0, ("index mismatch")); + + KASSERT(first->msi_dev != NULL, ("unowned group")); + + /* Clear all the extra messages in the group. */ + for (i = 1; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + KASSERT(msi->msi_first == first, ("message not in group")); + KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch")); + msi->msi_first = NULL; + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + } + + /* Clear out the first message. */ + first->msi_first = NULL; + first->msi_dev = NULL; + apic_free_vector(first->msi_vector, first->msi_irq); + first->msi_vector = 0; + first->msi_count = 0; + + sx_xunlock(&msi_sx); + return (0); +} + +int +msix_alloc(device_t dev, int index, int *irq, int *new) +{ + struct msi_intsrc *msi; + int i, vector; + + *new = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* Find a free IRQ. */ + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, start or continue a run. */ + if (msi->msi_dev == NULL) + break; + } + + /* Do we need to create a new source? */ + if (msi == NULL) { + /* If we would exceed the max, give up. */ + if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* Create a new source. */ + *new = 1; + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_irq = i; + intr_register_source(&msi->msi_intsrc); + } + + /* Allocate an IDT vector. */ + vector = apic_alloc_vector(i); + + /* Setup source. */ + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_dev = dev; + msi->msi_vector = vector; + msi->msi_index = index; + msi->msi_msix = 1; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + sx_xunlock(&msi_sx); + + *irq = i; + return (0); +} + +int +msix_release(int irq) +{ + struct msi_intsrc *msi; + + sx_xlock(&msi_sx); + msi = (struct msi_intsrc *)intr_lookup_source(irq); + if (msi == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this is an MSI-X message. */ + if (!msi->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + KASSERT(msi->msi_dev != NULL, ("unowned message")); + + /* Clear out the message. */ + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + msi->msi_msix = 0; + + sx_xunlock(&msi_sx); + return (0); +} diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c index 478106c..b897b2b 100644 --- a/sys/amd64/amd64/nexus.c +++ b/sys/amd64/amd64/nexus.c @@ -61,6 +61,8 @@ __FBSDID("$FreeBSD$"); #include <machine/resource.h> +#include "pcib_if.h" + #ifdef DEV_ISA #include <isa/isavar.h> #include <amd64/isa/isa.h> @@ -100,6 +102,10 @@ static struct resource_list *nexus_get_reslist(device_t dev, device_t child); static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long); static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *); static void nexus_delete_resource(device_t, device_t, int, int); +static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); +static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs); +static int nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq); +static int nexus_release_msix(device_t pcib, device_t dev, int irq); static device_method_t nexus_methods[] = { /* Device interface */ @@ -125,6 +131,12 @@ static device_method_t nexus_methods[] = { DEVMETHOD(bus_get_resource, nexus_get_resource), DEVMETHOD(bus_delete_resource, nexus_delete_resource), + /* pcib interface */ + DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi), + DEVMETHOD(pcib_release_msi, nexus_release_msi), + DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix), + DEVMETHOD(pcib_release_msix, nexus_release_msix), + { 0, 0 } }; @@ -504,6 +516,47 @@ nexus_delete_resource(device_t dev, device_t child, int type, int rid) resource_list_delete(rl, type, rid); } +static int +nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq) +{ + int error, new; + + error = msix_alloc(dev, index, irq, &new); + if (new) + rman_manage_region(&irq_rman, *irq, *irq); + return (error); +} + +static int +nexus_release_msix(device_t pcib, device_t dev, int irq) +{ + + return (msix_release(irq)); +} + +static int +nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) +{ + int error, i, newirq, newcount; + + /* First alloc the messages. */ + error = msi_alloc(dev, count, maxcount, irqs, &newirq, &newcount); + + /* Always add any new IRQs to the rman, even on failure. */ + for (i = 0; i < newcount; i++) + rman_manage_region(&irq_rman, irqs[newirq + i], + irqs[newirq + i]); + + return (error); +} + +static int +nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs) +{ + + return (msi_release(irqs, count)); +} + #ifdef DEV_ISA /* * Placeholder which claims PnP 'devices' which describe system diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h index ebd8f04..6cc37c6 100644 --- a/sys/amd64/include/apicvar.h +++ b/sys/amd64/include/apicvar.h @@ -175,6 +175,7 @@ inthand_t IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint); u_int apic_alloc_vector(u_int irq); +u_int apic_alloc_vectors(u_int *irqs, u_int count, u_int align); void apic_enable_vector(u_int vector); void apic_free_vector(u_int vector, u_int irq); u_int apic_idt_to_irq(u_int vector); diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h index 5043774..cddec7b 100644 --- a/sys/amd64/include/intr_machdep.h +++ b/sys/amd64/include/intr_machdep.h @@ -43,11 +43,18 @@ * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * - * For now we stick with 255 as ISA IRQs and PCI intline IRQs only allow - * for IRQs in the range 0 - 254. When MSI support is added this number - * will likely increase. + * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. + * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid + * confusion since 255 is used in PCI to indicate an invalid IRQ. */ -#define NUM_IO_INTS 255 +#define NUM_MSI_INTS 128 +#define FIRST_MSI_INT 256 +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) + +/* + * Default base address for MSI messages on x86 platforms. + */ +#define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. @@ -140,6 +147,12 @@ int intr_remove_handler(void *cookie); void intr_resume(void); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); +void msi_init(void); +int msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount); +int msi_release(int *irqs, int count); +int msix_alloc(device_t dev, int index, int *irq, int *new); +int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ diff --git a/sys/amd64/pci/pci_bus.c b/sys/amd64/pci/pci_bus.c index 716b050..6efde3a 100644 --- a/sys/amd64/pci/pci_bus.c +++ b/sys/amd64/pci/pci_bus.c @@ -322,6 +322,10 @@ static device_method_t legacy_pcib_methods[] = { DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, legacy_pcib_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 3d3bc8c..b961ab7 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -114,6 +114,7 @@ amd64/amd64/mp_watchdog.c optional mp_watchdog smp amd64/amd64/mpboot.S optional smp amd64/amd64/mptable.c optional mptable amd64/amd64/mptable_pci.c optional mptable pci +amd64/amd64/msi.c optional pci amd64/amd64/nexus.c standard amd64/amd64/pmap.c standard amd64/amd64/prof_machdep.c optional profiling-routine diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 09fc1b1..3de50fe 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -294,6 +294,7 @@ i386/i386/mp_watchdog.c optional mp_watchdog smp i386/i386/mpboot.s optional smp i386/i386/mptable.c optional apic i386/i386/mptable_pci.c optional apic pci +i386/i386/msi.c optional apic pci i386/i386/nexus.c standard i386/i386/perfmon.c optional perfmon i386/i386/pmap.c standard diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c index 596f236..040b8b1 100644 --- a/sys/i386/i386/local_apic.c +++ b/sys/i386/i386/local_apic.c @@ -746,6 +746,65 @@ apic_alloc_vector(u_int irq) panic("Couldn't find an APIC vector for IRQ %u", irq); } +/* + * Request 'count' free contiguous IDT vectors to be used by 'count' + * IRQs. 'count' must be a power of two and the vectors will be + * aligned on a boundary of 'align'. If the request cannot be + * satisfied, 0 is returned. + */ +u_int +apic_alloc_vectors(u_int *irqs, u_int count, u_int align) +{ + u_int first, run, vector; + + KASSERT(powerof2(count), ("bad count")); + KASSERT(powerof2(align), ("bad align")); + KASSERT(align >= count, ("align < count")); +#ifdef INVARIANTS + for (run = 0; run < count; run++) + KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", + irqs[run], run)); +#endif + + /* + * Search for 'count' free vectors. As with apic_alloc_vector(), + * this just uses a simple first fit algorithm. + */ + run = 0; + first = 0; + mtx_lock_spin(&icu_lock); + for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { + + /* Vector is in use, end run. */ + if (ioint_irqs[vector] != 0) { + run = 0; + first = 0; + continue; + } + + /* Start a new run if run == 0 and vector is aligned. */ + if (run == 0) { + if ((vector & (align - 1)) != 0) + continue; + first = vector; + } + run++; + + /* Keep looping if the run isn't long enough yet. */ + if (run < count) + continue; + + /* Found a run, assign IRQs and return the first vector. */ + for (vector = 0; vector < count; vector++) + ioint_irqs[first + vector] = irqs[vector]; + mtx_unlock_spin(&icu_lock); + return (first + APIC_IO_INTS); + } + mtx_unlock_spin(&icu_lock); + printf("APIC: Couldn't find APIC vectors for %u IRQs\n", count); + return (0); +} + void apic_enable_vector(u_int vector) { @@ -1005,6 +1064,9 @@ apic_setup_io(void *dummy __unused) intr_register_pic(&lapic_pic); if (bootverbose) lapic_dump("BSP"); + + /* Enable the MSI "pic". */ + msi_init(); } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_SECOND, apic_setup_io, NULL) diff --git a/sys/i386/i386/mptable_pci.c b/sys/i386/i386/mptable_pci.c index 43e48b4..16bc6a9 100644 --- a/sys/i386/i386/mptable_pci.c +++ b/sys/i386/i386/mptable_pci.c @@ -96,6 +96,10 @@ static device_method_t mptable_hostb_methods[] = { DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; @@ -148,6 +152,10 @@ static device_method_t mptable_pcib_pci_methods[] = { DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, mptable_pci_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} }; diff --git a/sys/i386/i386/msi.c b/sys/i386/i386/msi.c new file mode 100644 index 0000000..9bbdf83 --- /dev/null +++ b/sys/i386/i386/msi.c @@ -0,0 +1,511 @@ +/*- + * Copyright (c) 2006 John Baldwin <jhb@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the author nor the names of any co-contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on + * x86 are basically APIC messages that the northbridge delivers directly + * to the local APICs as if they had come from an I/O APIC. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/bus.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/sx.h> +#include <sys/systm.h> +#include <machine/apicreg.h> +#include <machine/md_var.h> +#include <machine/frame.h> +#include <machine/intr_machdep.h> +#include <machine/apicvar.h> +#include <dev/pci/pcivar.h> + +/* Fields in address for Intel MSI messages. */ +#define MSI_INTEL_ADDR_DEST 0x000ff000 +#define MSI_INTEL_ADDR_RH 0x00000008 +# define MSI_INTEL_ADDR_RH_ON 0x00000008 +# define MSI_INTEL_ADDR_RH_OFF 0x00000000 +#define MSI_INTEL_ADDR_DM 0x00000004 +# define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000 +# define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004 + +/* Fields in data for Intel MSI messages. */ +#define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */ +# define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG +# define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL +#define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */ +# define MSI_INTEL_DATA_DEASSERT 0x00000000 +# define MSI_INTEL_DATA_ASSERT 0x00004000 +#define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */ +# define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED +# define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI +# define MSI_INTEL_DATA_DELSMI IOART_DELSMI +# define MSI_INTEL_DATA_DELNMI IOART_DELNMI +# define MSI_INTEL_DATA_DELINIT IOART_DELINIT +# define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT +#define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */ + +/* + * Build Intel MSI message and data values from a source. AMD64 systems + * seem to be compatible, so we use the same function for both. + */ +#define INTEL_ADDR(msi) \ + (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \ + MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL) +#define INTEL_DATA(msi) \ + (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector) + +static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI"); + +/* + * MSI sources are bunched into groups. This is because MSI forces + * all of the messages to share the address and data registers and + * thus certain properties (such as the local APIC ID target on x86). + * Each group has a 'first' source that contains information global to + * the group. These fields are marked with (g) below. + * + * Note that local APIC ID is kind of special. Each message will be + * assigned an ID by the system; however, a group will use the ID from + * the first message. + * + * For MSI-X, each message is isolated, and msi_index indicates the + * index of this message in the device's MSI-X table. + */ +struct msi_intsrc { + struct intsrc msi_intsrc; + device_t msi_dev; /* Owning device. (g) */ + struct msi_intsrc *msi_first; /* First source in group. */ + u_int msi_irq; /* IRQ cookie. */ + u_int msi_index; /* Index of this message. */ + u_int msi_msix; /* MSI-X message. */ + u_int msi_vector:8; /* IDT vector. */ + u_int msi_cpu:8; /* Local APIC ID. (g) */ + u_int msi_count:8; /* Messages in this group. (g) */ +}; + +static void msi_enable_source(struct intsrc *isrc); +static void msi_disable_source(struct intsrc *isrc, int eoi); +static void msi_eoi_source(struct intsrc *isrc); +static void msi_enable_intr(struct intsrc *isrc); +static int msi_vector(struct intsrc *isrc); +static int msi_source_pending(struct intsrc *isrc); +static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol); +static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id); +static void msix_enable_intr(struct intsrc *isrc); +static int msix_source_pending(struct intsrc *isrc); +static void msix_assign_cpu(struct intsrc *isrc, u_int apic_id); + +struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msi_enable_intr, msi_vector, msi_source_pending, + NULL, NULL, msi_config_intr, msi_assign_cpu }; +struct pic msix_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, + msix_enable_intr, msi_vector, msix_source_pending, + NULL, NULL, msi_config_intr, msix_assign_cpu }; + +static int msi_enabled; +static struct sx msi_sx; + +static void +msi_enable_source(struct intsrc *isrc) +{ +} + +static void +msi_disable_source(struct intsrc *isrc, int eoi) +{ + + if (eoi == PIC_EOI) + lapic_eoi(); +} + +static void +msi_eoi_source(struct intsrc *isrc) +{ + + lapic_eoi(); +} + +static void +msi_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + /* + * Since we can only enable the entire group at once, go ahead and + * enable the messages when the first message is given a handler. + * Note that we assume all devices will register a handler for the + * first message. + */ + if (msi->msi_index == 0) { + mtx_lock_spin(&icu_lock); + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); + } + apic_enable_vector(msi->msi_vector); +} + +static int +msi_vector(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (msi->msi_irq); +} + +static int +msi_source_pending(struct intsrc *isrc) +{ + + return (0); +} + +static int +msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, + enum intr_polarity pol) +{ + + return (ENODEV); +} + +static void +msi_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msi(msi->msi_dev, INTEL_ADDR(msi), INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +static void +msix_enable_intr(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + mtx_lock_spin(&icu_lock); + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + pci_unmask_msix(msi->msi_dev, msi->msi_index); + mtx_unlock_spin(&icu_lock); + apic_enable_vector(msi->msi_vector); +} + +static int +msix_source_pending(struct intsrc *isrc) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + return (pci_pending_msix(msi->msi_dev, msi->msi_index)); +} + +static void +msix_assign_cpu(struct intsrc *isrc, u_int apic_id) +{ + struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + + msi->msi_cpu = apic_id; + if (bootverbose) + printf("msi: Assigning MSI-X IRQ %d to local APIC %u\n", + msi->msi_irq, msi->msi_cpu); + mtx_lock_spin(&icu_lock); + if (isrc->is_enabled) + pci_enable_msix(msi->msi_dev, msi->msi_index, INTEL_ADDR(msi), + INTEL_DATA(msi)); + mtx_unlock_spin(&icu_lock); +} + +void +msi_init(void) +{ + + /* Check if we have a supported CPU. */ + if (!(strcmp(cpu_vendor, "GenuineIntel") == 0 || + strcmp(cpu_vendor, "AuthenticAMD") == 0)) + return; + + msi_enabled = 1; + intr_register_pic(&msi_pic); + intr_register_pic(&msix_pic); + sx_init(&msi_sx, "msi"); +} + +/* + * Try to allocate 'count' interrupt sources with contiguous IDT values. If + * we allocate any new sources, then their IRQ values will be at the end of + * the irqs[] array, with *newirq being the index of the first new IRQ value + * and *newcount being the number of new IRQ values added. + */ +int +msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount) +{ + struct msi_intsrc *msi, *fsrc; + int cnt, i, j, vector; + + *newirq = 0; + *newcount = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* Try to find 'count' free IRQs. */ + cnt = 0; + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, save its IRQ in the array. */ + if (msi->msi_dev == NULL) { + irqs[cnt] = i; + cnt++; + if (cnt == count) + break; + } + } + + /* Do we need to create some new sources? */ + if (cnt < count) { + /* If we would exceed the max, give up. */ + if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* We need count - cnt more sources starting at index 'cnt'. */ + *newirq = cnt; + *newcount = count - cnt; + for (j = 0; j < *newirq; j++) { + + /* Create a new MSI source. */ + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_irq = i + j; + intr_register_source(&msi->msi_intsrc); + + /* Add it to our array. */ + irqs[cnt] = i + j; + cnt++; + } + } + + /* Ok, we now have the IRQs allocated. */ + KASSERT(cnt == count, ("count mismatch")); + + /* Allocate 'count' IDT vectors. */ + vector = apic_alloc_vectors(irqs, count, maxcount); + if (vector == 0) { + sx_xunlock(&msi_sx); + return (ENOSPC); + } + + /* Assign IDT vectors and make these messages owned by 'dev'. */ + fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + for (i = 0; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + msi->msi_intsrc.is_pic = &msi_pic; + msi->msi_dev = dev; + msi->msi_vector = vector + i; + msi->msi_index = i; + msi->msi_first = fsrc; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + } + fsrc->msi_count = count; + sx_xunlock(&msi_sx); + + return (0); +} + +int +msi_release(int *irqs, int count) +{ + struct msi_intsrc *msi, *first; + int i; + + sx_xlock(&msi_sx); + first = (struct msi_intsrc *)intr_lookup_source(irqs[0]); + if (first == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this isn't an MSI-X message. */ + if (first->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + /* Make sure this message is allocated to a group. */ + if (first->msi_first == NULL) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* + * Make sure this is the start of a group and that we are releasing + * the entire group. + */ + if (first->msi_first != first || first->msi_count != count) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + KASSERT(first->msi_index == 0, ("index mismatch")); + + KASSERT(first->msi_dev != NULL, ("unowned group")); + + /* Clear all the extra messages in the group. */ + for (i = 1; i < count; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]); + KASSERT(msi->msi_first == first, ("message not in group")); + KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch")); + msi->msi_first = NULL; + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + } + + /* Clear out the first message. */ + first->msi_first = NULL; + first->msi_dev = NULL; + apic_free_vector(first->msi_vector, first->msi_irq); + first->msi_vector = 0; + first->msi_count = 0; + + sx_xunlock(&msi_sx); + return (0); +} + +int +msix_alloc(device_t dev, int index, int *irq, int *new) +{ + struct msi_intsrc *msi; + int i, vector; + + *new = 0; + if (!msi_enabled) + return (ENXIO); + + sx_xlock(&msi_sx); + + /* Find a free IRQ. */ + for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + msi = (struct msi_intsrc *)intr_lookup_source(i); + + /* End of allocated sources, so break. */ + if (msi == NULL) + break; + + /* If this is a free one, start or continue a run. */ + if (msi->msi_dev == NULL) + break; + } + + /* Do we need to create a new source? */ + if (msi == NULL) { + /* If we would exceed the max, give up. */ + if (i + 1 > FIRST_MSI_INT + NUM_MSI_INTS) { + sx_xunlock(&msi_sx); + return (ENXIO); + } + + /* Create a new source. */ + *new = 1; + msi = malloc(sizeof(struct msi_intsrc), M_MSI, + M_WAITOK | M_ZERO); + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_irq = i; + intr_register_source(&msi->msi_intsrc); + } + + /* Allocate an IDT vector. */ + vector = apic_alloc_vector(i); + + /* Setup source. */ + msi->msi_intsrc.is_pic = &msix_pic; + msi->msi_dev = dev; + msi->msi_vector = vector; + msi->msi_index = index; + msi->msi_msix = 1; + + /* XXX: Somewhat gross. */ + msi->msi_intsrc.is_enabled = 0; + sx_xunlock(&msi_sx); + + *irq = i; + return (0); +} + +int +msix_release(int irq) +{ + struct msi_intsrc *msi; + + sx_xlock(&msi_sx); + msi = (struct msi_intsrc *)intr_lookup_source(irq); + if (msi == NULL) { + sx_xunlock(&msi_sx); + return (ENOENT); + } + + /* Make sure this is an MSI-X message. */ + if (!msi->msi_msix) { + sx_xunlock(&msi_sx); + return (EINVAL); + } + + KASSERT(msi->msi_dev != NULL, ("unowned message")); + + /* Clear out the message. */ + msi->msi_dev = NULL; + apic_free_vector(msi->msi_vector, msi->msi_irq); + msi->msi_vector = 0; + msi->msi_index = 0; + msi->msi_msix = 0; + + sx_xunlock(&msi_sx); + return (0); +} diff --git a/sys/i386/i386/nexus.c b/sys/i386/i386/nexus.c index 6e1e281..5b62f73 100644 --- a/sys/i386/i386/nexus.c +++ b/sys/i386/i386/nexus.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); * and I/O memory address space. */ +#include "opt_apic.h" #include "opt_isa.h" #include <sys/param.h> @@ -61,6 +62,10 @@ __FBSDID("$FreeBSD$"); #include <machine/resource.h> +#ifdef DEV_APIC +#include "pcib_if.h" +#endif + #ifdef DEV_ISA #include <isa/isavar.h> #ifdef PC98 @@ -104,6 +109,12 @@ static struct resource_list *nexus_get_reslist(device_t dev, device_t child); static int nexus_set_resource(device_t, device_t, int, int, u_long, u_long); static int nexus_get_resource(device_t, device_t, int, int, u_long *, u_long *); static void nexus_delete_resource(device_t, device_t, int, int); +#ifdef DEV_APIC +static int nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs); +static int nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs); +static int nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq); +static int nexus_release_msix(device_t pcib, device_t dev, int irq); +#endif static device_method_t nexus_methods[] = { /* Device interface */ @@ -129,6 +140,14 @@ static device_method_t nexus_methods[] = { DEVMETHOD(bus_get_resource, nexus_get_resource), DEVMETHOD(bus_delete_resource, nexus_delete_resource), + /* pcib interface */ +#ifdef DEV_APIC + DEVMETHOD(pcib_alloc_msi, nexus_alloc_msi), + DEVMETHOD(pcib_release_msi, nexus_release_msi), + DEVMETHOD(pcib_alloc_msix, nexus_alloc_msix), + DEVMETHOD(pcib_release_msix, nexus_release_msix), +#endif + { 0, 0 } }; @@ -552,6 +571,49 @@ nexus_delete_resource(device_t dev, device_t child, int type, int rid) resource_list_delete(rl, type, rid); } +#ifdef DEV_APIC +static int +nexus_alloc_msix(device_t pcib, device_t dev, int index, int *irq) +{ + int error, new; + + error = msix_alloc(dev, index, irq, &new); + if (new) + rman_manage_region(&irq_rman, *irq, *irq); + return (error); +} + +static int +nexus_release_msix(device_t pcib, device_t dev, int irq) +{ + + return (msix_release(irq)); +} + +static int +nexus_alloc_msi(device_t pcib, device_t dev, int count, int maxcount, int *irqs) +{ + int error, i, newirq, newcount; + + /* First alloc the messages. */ + error = msi_alloc(dev, count, maxcount, irqs, &newirq, &newcount); + + /* Always add any new IRQs to the rman, even on failure. */ + for (i = 0; i < newcount; i++) + rman_manage_region(&irq_rman, irqs[newirq + i], + irqs[newirq + i]); + + return (error); +} + +static int +nexus_release_msi(device_t pcib, device_t dev, int count, int *irqs) +{ + + return (msi_release(irqs, count)); +} +#endif + #ifdef DEV_ISA /* * Placeholder which claims PnP 'devices' which describe system diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h index 99c3028..5116162 100644 --- a/sys/i386/include/apicvar.h +++ b/sys/i386/include/apicvar.h @@ -174,6 +174,7 @@ inthand_t IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint); u_int apic_alloc_vector(u_int irq); +u_int apic_alloc_vectors(u_int *irqs, u_int count, u_int align); void apic_enable_vector(u_int vector); void apic_free_vector(u_int vector, u_int irq); u_int apic_idt_to_irq(u_int vector); diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h index 4884fd5..bfeba5e 100644 --- a/sys/i386/include/intr_machdep.h +++ b/sys/i386/include/intr_machdep.h @@ -43,11 +43,18 @@ * 191 and still be safe since only interrupt sources in actual use will * allocate IDT vectors. * - * For now we stick with 255 as ISA IRQs and PCI intline IRQs only allow - * for IRQs in the range 0 - 254. When MSI support is added this number - * will likely increase. + * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. + * IRQ values beyond 256 are used by MSI. We leave 255 unused to avoid + * confusion since 255 is used in PCI to indicate an invalid IRQ. */ -#define NUM_IO_INTS 255 +#define NUM_MSI_INTS 128 +#define FIRST_MSI_INT 256 +#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS) + +/* + * Default base address for MSI messages on x86 platforms. + */ +#define MSI_INTEL_ADDR_BASE 0xfee00000 /* * - 1 ??? dummy counter. @@ -137,6 +144,12 @@ int intr_remove_handler(void *cookie); void intr_resume(void); void intr_suspend(void); void intrcnt_add(const char *name, u_long **countp); +void msi_init(void); +int msi_alloc(device_t dev, int count, int maxcount, int *irqs, int *newirq, + int *newcount); +int msi_release(int* irqs, int count); +int msix_alloc(device_t dev, int index, int *irq, int *new); +int msix_release(int irq); #endif /* !LOCORE */ #endif /* _KERNEL */ diff --git a/sys/i386/pci/pci_bus.c b/sys/i386/pci/pci_bus.c index 6413d0e..a91d214 100644 --- a/sys/i386/pci/pci_bus.c +++ b/sys/i386/pci/pci_bus.c @@ -534,6 +534,10 @@ static device_method_t legacy_pcib_methods[] = { DEVMETHOD(pcib_read_config, legacy_pcib_read_config), DEVMETHOD(pcib_write_config, legacy_pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), { 0, 0 } }; @@ -623,6 +627,10 @@ static device_method_t pcibios_pcib_pci_methods[] = { DEVMETHOD(pcib_read_config, pcib_read_config), DEVMETHOD(pcib_write_config, pcib_write_config), DEVMETHOD(pcib_route_interrupt, pcibios_pcib_route_interrupt), + DEVMETHOD(pcib_alloc_msi, pcib_alloc_msi), + DEVMETHOD(pcib_release_msi, pcib_release_msi), + DEVMETHOD(pcib_alloc_msix, pcib_alloc_msix), + DEVMETHOD(pcib_release_msix, pcib_release_msix), {0, 0} }; |