From 76256698a1223f7daae4d1340f06dfdda9147185 Mon Sep 17 00:00:00 2001 From: jhb Date: Wed, 1 Jul 2009 17:20:07 +0000 Subject: Improve the handling of cpuset with interrupts. - For x86, change the interrupt source method to assign an interrupt source to a specific CPU to return an error value instead of void, thus allowing it to fail. - If moving an interrupt to a CPU fails due to a lack of IDT vectors in the destination CPU, fail the request with ENOSPC rather than panicing. - For MSI interrupts on x86 (but not MSI-X), only allow cpuset to be used on the first interrupt in a group. Moving the first interrupt in a group moves the entire group. - Use the icu_lock to protect intr_next_cpu() on x86 instead of the intr_table_lock to fix a LOR introduced in the last set of MSI changes. - Add a new privilege PRIV_SCHED_CPUSET_INTR for using cpuset with interrupts. Previously, binding an interrupt to a CPU only performed a privilege check if the interrupt had an interrupt thread. Interrupts without a thread could be bound by non-root users as a result. - If an interrupt event's assign_cpu method fails, then restore the original cpuset mask for the associated interrupt thread. Approved by: re (kib) --- sys/i386/i386/intr_machdep.c | 35 ++++++++++-------- sys/i386/i386/io_apic.c | 16 +++++--- sys/i386/i386/local_apic.c | 6 +-- sys/i386/i386/msi.c | 82 ++++++++++++++++++++++++++++++++--------- sys/i386/include/intr_machdep.h | 2 +- sys/i386/isa/atpic.c | 4 +- 6 files changed, 101 insertions(+), 44 deletions(-) (limited to 'sys/i386') diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c index a5c7d00..3d1b0c4 100644 --- a/sys/i386/i386/intr_machdep.c +++ b/sys/i386/i386/intr_machdep.c @@ -290,7 +290,8 @@ static int intr_assign_cpu(void *arg, u_char cpu) { #ifdef SMP - struct intsrc *isrc; + struct intsrc *isrc; + int error; /* * Don't do anything during early boot. We will pick up the @@ -299,10 +300,11 @@ intr_assign_cpu(void *arg, u_char cpu) if (assign_cpu && cpu != NOCPU) { isrc = arg; mtx_lock(&intr_table_lock); - isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); + error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]); mtx_unlock(&intr_table_lock); - } - return (0); + } else + error = 0; + return (error); #else return (EOPNOTSUPP); #endif @@ -359,7 +361,7 @@ intr_init(void *dummy __unused) intrcnt_setname("???", 0); intrcnt_index = 1; STAILQ_INIT(&pics); - mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF | MTX_RECURSE); + mtx_init(&intr_table_lock, "intr sources", NULL, MTX_DEF); mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN); } SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL); @@ -407,14 +409,14 @@ intr_next_cpu(void) if (!assign_cpu) return (cpu_apic_ids[0]); - mtx_lock(&intr_table_lock); + mtx_lock_spin(&icu_lock); apic_id = cpu_apic_ids[current_cpu]; do { current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; } while (!(intr_cpus & (1 << current_cpu))); - mtx_unlock(&intr_table_lock); + mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -455,7 +457,6 @@ static void intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; - u_int apic_id; int i; #ifdef XEN @@ -463,8 +464,8 @@ intr_shuffle_irqs(void *arg __unused) * Doesn't work yet */ return; -#endif - +#endif + /* Don't bother on UP. */ if (mp_ncpus == 1) return; @@ -478,13 +479,17 @@ intr_shuffle_irqs(void *arg __unused) /* * If this event is already bound to a CPU, * then assign the source to that CPU instead - * of picking one via round-robin. + * of picking one via round-robin. Note that + * this is careful to only advance the + * round-robin if the CPU assignment succeeds. */ if (isrc->is_event->ie_cpu != NOCPU) - apic_id = isrc->is_event->ie_cpu; - else - apic_id = intr_next_cpu(); - isrc->is_pic->pic_assign_cpu(isrc, apic_id); + (void)isrc->is_pic->pic_assign_cpu(isrc, + isrc->is_event->ie_cpu); + else if (isrc->is_pic->pic_assign_cpu(isrc, + cpu_apic_ids[current_cpu]) == 0) + (void)intr_next_cpu(); + } } mtx_unlock(&intr_table_lock); diff --git a/sys/i386/i386/io_apic.c b/sys/i386/i386/io_apic.c index d3bdad0..87d5481 100644 --- a/sys/i386/i386/io_apic.c +++ b/sys/i386/i386/io_apic.c @@ -120,7 +120,7 @@ static int ioapic_source_pending(struct intsrc *isrc); static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); static void ioapic_resume(struct pic *pic); -static void ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void ioapic_program_intpin(struct ioapic_intsrc *intpin); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); @@ -322,7 +322,7 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin) mtx_unlock_spin(&icu_lock); } -static void +static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) { struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; @@ -342,7 +342,7 @@ ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) old_vector = intpin->io_vector; old_id = intpin->io_cpu; if (old_vector && apic_id == old_id) - return; + return (0); /* * Allocate an APIC vector for this interrupt pin. Once @@ -350,6 +350,9 @@ ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) */ intpin->io_cpu = apic_id; intpin->io_vector = apic_alloc_vector(apic_id, intpin->io_irq); + if (intpin->io_vector == 0) + return (ENOSPC); + if (bootverbose) { printf("ioapic%u: routing intpin %u (", io->io_id, intpin->io_intpin); @@ -364,6 +367,7 @@ ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id) */ if (old_vector) apic_free_vector(old_id, old_vector, intpin->io_irq); + return (0); } static void @@ -372,7 +376,9 @@ ioapic_enable_intr(struct intsrc *isrc) struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc; if (intpin->io_vector == 0) - ioapic_assign_cpu(isrc, intr_next_cpu()); + if (ioapic_assign_cpu(isrc, intr_next_cpu()) != 0) + panic("Couldn't find an APIC vector for IRQ %d", + intpin->io_irq); apic_enable_vector(intpin->io_cpu, intpin->io_vector); } @@ -496,7 +502,7 @@ ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) io->io_pic = ioapic_template; mtx_lock_spin(&icu_lock); io->io_id = next_id++; - io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; + io->io_apic_id = ioapic_read(apic, IOAPIC_ID) >> APIC_ID_SHIFT; if (apic_id != -1 && io->io_apic_id != apic_id) { ioapic_write(apic, IOAPIC_ID, apic_id << APIC_ID_SHIFT); mtx_unlock_spin(&icu_lock); diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c index 1e136f3..6b350e2 100644 --- a/sys/i386/i386/local_apic.c +++ b/sys/i386/i386/local_apic.c @@ -139,7 +139,7 @@ static inthand_t *ioint_handlers[] = { }; -static u_int32_t lapic_timer_divisors[] = { +static u_int32_t lapic_timer_divisors[] = { APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 }; @@ -799,7 +799,7 @@ apic_alloc_vector(u_int apic_id, u_int irq) return (vector + APIC_IO_INTS); } mtx_unlock_spin(&icu_lock); - panic("Couldn't find an APIC vector for IRQ %u", irq); + return (0); } /* @@ -1062,7 +1062,7 @@ DB_SHOW_COMMAND(lapic, db_show_lapic) static SLIST_HEAD(, apic_enumerator) enumerators = SLIST_HEAD_INITIALIZER(enumerators); static struct apic_enumerator *best_enum; - + void apic_register_enumerator(struct apic_enumerator *enumerator) { diff --git a/sys/i386/i386/msi.c b/sys/i386/i386/msi.c index d5e24c9..da5bedb 100644 --- a/sys/i386/i386/msi.c +++ b/sys/i386/i386/msi.c @@ -113,6 +113,8 @@ struct msi_intsrc { u_int msi_vector:8; /* IDT vector. */ u_int msi_cpu:8; /* Local APIC ID. (g) */ u_int msi_count:8; /* Messages in this group. (g) */ + u_int msi_maxcount:8; /* Alignment for this group. (g) */ + int *msi_irqs; /* Group's IRQ list. (g) */ }; static void msi_create_source(void); @@ -125,7 +127,7 @@ static int msi_vector(struct intsrc *isrc); static int msi_source_pending(struct intsrc *isrc); static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void msi_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id); struct pic msi_pic = { msi_enable_source, msi_disable_source, msi_eoi_source, msi_enable_intr, msi_disable_intr, msi_vector, @@ -195,32 +197,52 @@ msi_config_intr(struct intsrc *isrc, enum intr_trigger trig, return (ENODEV); } -static void +static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id) { - struct msi_intsrc *msi = (struct msi_intsrc *)isrc; + struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc; int old_vector; u_int old_id; - int vector; + int i, vector; + + /* + * Only allow CPUs to be assigned to the first message for an + * MSI group. + */ + if (msi->msi_first != msi) + return (EINVAL); /* Store information to free existing irq. */ old_vector = msi->msi_vector; old_id = msi->msi_cpu; if (old_id == apic_id) - return; - if (!msi->msi_msix && msi->msi_first->msi_count > 1) - return; - - /* Allocate IDT vector on this cpu. */ - vector = apic_alloc_vector(apic_id, msi->msi_irq); + return (0); + + /* Allocate IDT vectors on this cpu. */ + if (msi->msi_count > 1) { + KASSERT(msi->msi_msix == 0, ("MSI-X message group")); + vector = apic_alloc_vectors(apic_id, msi->msi_irqs, + msi->msi_count, msi->msi_maxcount); + } else + vector = apic_alloc_vector(apic_id, msi->msi_irq); if (vector == 0) - return; /* XXX alloc_vector panics on failure. */ + return (ENOSPC); + msi->msi_cpu = apic_id; msi->msi_vector = vector; if (bootverbose) printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n", msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq, msi->msi_cpu, msi->msi_vector); + for (i = 1; i < msi->msi_count; i++) { + sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]); + sib->msi_cpu = apic_id; + sib->msi_vector = vector + i; + if (bootverbose) + printf( + "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n", + sib->msi_irq, sib->msi_cpu, sib->msi_vector); + } pci_remap_msi_irq(msi->msi_dev, msi->msi_irq); /* @@ -228,6 +250,9 @@ msi_assign_cpu(struct intsrc *isrc, u_int apic_id) * to prevent races where we could miss an interrupt. */ apic_free_vector(old_id, old_vector, msi->msi_irq); + for (i = 1; i < msi->msi_count; i++) + apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]); + return (0); } void @@ -268,7 +293,7 @@ msi_create_source(void) msi_last_irq++; mtx_unlock(&msi_lock); - msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO); + msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO); msi->msi_intsrc.is_pic = &msi_pic; msi->msi_irq = irq; intr_register_source(&msi->msi_intsrc); @@ -276,21 +301,22 @@ msi_create_source(void) } /* - * Try to allocate 'count' interrupt sources with contiguous IDT values. If - * we allocate any new sources, then their IRQ values will be at the end of - * the irqs[] array, with *newirq being the index of the first new IRQ value - * and *newcount being the number of new IRQ values added. + * Try to allocate 'count' interrupt sources with contiguous IDT values. */ int msi_alloc(device_t dev, int count, int maxcount, int *irqs) { struct msi_intsrc *msi, *fsrc; u_int cpu; - int cnt, i, vector; + int cnt, i, *mirqs, vector; if (!msi_enabled) return (ENXIO); + if (count > 1) + mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK); + else + mirqs = NULL; again: mtx_lock(&msi_lock); @@ -317,6 +343,7 @@ again: /* If we would exceed the max, give up. */ if (i + (count - cnt) > FIRST_MSI_INT + NUM_MSI_INTS) { mtx_unlock(&msi_lock); + free(mirqs, M_MSI); return (ENXIO); } mtx_unlock(&msi_lock); @@ -337,6 +364,7 @@ again: vector = apic_alloc_vectors(cpu, irqs, count, maxcount); if (vector == 0) { mtx_unlock(&msi_lock); + free(mirqs, M_MSI); return (ENOSPC); } @@ -356,6 +384,10 @@ again: ("dead MSI has handlers")); } fsrc->msi_count = count; + fsrc->msi_maxcount = maxcount; + if (count > 1) + bcopy(irqs, mirqs, count * sizeof(*mirqs)); + fsrc->msi_irqs = mirqs; mtx_unlock(&msi_lock); return (0); @@ -413,6 +445,9 @@ msi_release(int *irqs, int count) apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq); first->msi_vector = 0; first->msi_count = 0; + first->msi_maxcount = 0; + free(first->msi_irqs, M_MSI); + first->msi_irqs = NULL; mtx_unlock(&msi_lock); return (0); @@ -498,15 +533,23 @@ again: /* Allocate an IDT vector. */ cpu = intr_next_cpu(); vector = apic_alloc_vector(cpu, i); + if (vector == 0) { + mtx_unlock(&msi_lock); + return (ENOSPC); + } if (bootverbose) printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n", msi->msi_irq, cpu, vector); - + /* Setup source. */ msi->msi_cpu = cpu; msi->msi_dev = dev; + msi->msi_first = msi; msi->msi_vector = vector; msi->msi_msix = 1; + msi->msi_count = 1; + msi->msi_maxcount = 1; + msi->msi_irqs = NULL; KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers")); mtx_unlock(&msi_lock); @@ -536,10 +579,13 @@ msix_release(int irq) KASSERT(msi->msi_dev != NULL, ("unowned message")); /* Clear out the message. */ + msi->msi_first = NULL; msi->msi_dev = NULL; apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq); msi->msi_vector = 0; msi->msi_msix = 0; + msi->msi_count = 0; + msi->msi_maxcount = 0; mtx_unlock(&msi_lock); return (0); diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h index aa026b6..f21e0bcf 100644 --- a/sys/i386/include/intr_machdep.h +++ b/sys/i386/include/intr_machdep.h @@ -93,7 +93,7 @@ struct pic { void (*pic_resume)(struct pic *); int (*pic_config_intr)(struct intsrc *, enum intr_trigger, enum intr_polarity); - void (*pic_assign_cpu)(struct intsrc *, u_int apic_id); + int (*pic_assign_cpu)(struct intsrc *, u_int apic_id); STAILQ_ENTRY(pic) pics; }; diff --git a/sys/i386/isa/atpic.c b/sys/i386/isa/atpic.c index dfeeb21..37a1285 100644 --- a/sys/i386/isa/atpic.c +++ b/sys/i386/isa/atpic.c @@ -161,7 +161,7 @@ static void atpic_resume(struct pic *pic); static int atpic_source_pending(struct intsrc *isrc); static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, enum intr_polarity pol); -static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); +static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id); static void i8259_init(struct atpic *pic, int slave); static struct atpic atpics[] = { @@ -389,7 +389,7 @@ atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig, #endif /* PC98 */ } -static void +static int atpic_assign_cpu(struct intsrc *isrc, u_int apic_id) { -- cgit v1.1