summaryrefslogtreecommitdiffstats
path: root/sys/amd64
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2006-02-28 22:24:55 +0000
committerjhb <jhb@FreeBSD.org>2006-02-28 22:24:55 +0000
commit3478c467ee4302dfd13af593c68c02bcf15cd5aa (patch)
treef5550e7925fc98745c64609db43678c325dc0243 /sys/amd64
parent1ec49d602a80028377023a0fdc8f11d6281a3b18 (diff)
downloadFreeBSD-src-3478c467ee4302dfd13af593c68c02bcf15cd5aa.zip
FreeBSD-src-3478c467ee4302dfd13af593c68c02bcf15cd5aa.tar.gz
Rework how we wire up interrupt sources to CPUs:
- Throw out all of the logical APIC ID stuff. The Intel docs are somewhat ambiguous, but it seems that the "flat" cluster model we are currently using is only supported on Pentium and P6 family CPUs. The other "hierarchy" cluster model that is supported on all Intel CPUs with local APICs is severely underdocumented. For example, it's not clear if the OS needs to glean the topology of the APIC hierarchy from somewhere (neither ACPI nor MP Table include it) and setup the logical clusters based on the physical hierarchy or not. Not only that, but on certain Intel chipsets, even though there were 4 CPUs in a logical cluster, all the interrupts were only sent to one CPU anyway. - We now bind interrupts to individual CPUs using physical addressing via the local APIC IDs. This code has also moved out of the ioapic PIC driver and into the common interrupt source code so that it can be shared with MSI interrupt sources since MSI is addressed to APICs the same way that I/O APIC pins are. - Interrupt source classes grow a new method pic_assign_cpu() to bind an interrupt source to a specific local APIC ID. - The SMP code now tells the interrupt code which CPUs are avaiable to handle interrupts in a simpler and more intuitive manner. For one thing, it means we could now choose to not route interrupts to HT cores if we wanted to (this code is currently in place in fact, but under an #if 0 for now). - For now we simply do static round-robin of IRQs to CPUs when the first interrupt handler just as before, with the change that IRQs are now bound to individual CPUs rather than groups of up to 4 CPUs. - Because the IRQ to CPU mapping has now been moved up a layer, it would be easier to manage this mapping from higher levels. For example, we could allow drivers to specify a CPU affinity map for their interrupts, or we could allow a userland tool to bind IRQs to specific CPUs. The MFC is tentative, but I want to see if this fixes problems some folks had with UP APIC kernels on 6.0 on SMP machines (an SMP kernel would work fine, but a UP APIC kernel (such as GENERIC in RELENG_6) would lose interrupts). MFC after: 1 week
Diffstat (limited to 'sys/amd64')
-rw-r--r--sys/amd64/amd64/intr_machdep.c92
-rw-r--r--sys/amd64/amd64/io_apic.c115
-rw-r--r--sys/amd64/amd64/local_apic.c16
-rw-r--r--sys/amd64/amd64/mp_machdep.c40
-rw-r--r--sys/amd64/include/apicvar.h1
-rw-r--r--sys/amd64/include/intr_machdep.h7
-rw-r--r--sys/amd64/isa/atpic.c16
7 files changed, 152 insertions, 135 deletions
diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c
index 7338bae..9d70d06 100644
--- a/sys/amd64/amd64/intr_machdep.c
+++ b/sys/amd64/amd64/intr_machdep.c
@@ -63,6 +63,12 @@ static int intrcnt_index;
static struct intsrc *interrupt_sources[NUM_IO_INTS];
static struct mtx intr_table_lock;
+#ifdef SMP
+static int assign_cpu;
+
+static void intr_assign_next_cpu(struct intsrc *isrc);
+#endif
+
static void intr_init(void *__dummy);
static void intrcnt_setname(const char *name, int index);
static void intrcnt_updatename(struct intsrc *is);
@@ -93,6 +99,7 @@ intr_register_source(struct intsrc *isrc)
}
intrcnt_register(isrc);
interrupt_sources[vector] = isrc;
+ isrc->is_enabled = 0;
mtx_unlock_spin(&intr_table_lock);
return (0);
}
@@ -118,7 +125,17 @@ intr_add_handler(const char *name, int vector, driver_intr_t handler,
intr_priority(flags), flags, cookiep);
if (error == 0) {
intrcnt_updatename(isrc);
- isrc->is_pic->pic_enable_intr(isrc);
+ mtx_lock_spin(&intr_table_lock);
+ if (!isrc->is_enabled) {
+ isrc->is_enabled = 1;
+#ifdef SMP
+ if (assign_cpu)
+ intr_assign_next_cpu(isrc);
+#endif
+ mtx_unlock_spin(&intr_table_lock);
+ isrc->is_pic->pic_enable_intr(isrc);
+ } else
+ mtx_unlock_spin(&intr_table_lock);
isrc->is_pic->pic_enable_source(isrc);
}
return (error);
@@ -335,3 +352,76 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
db_dump_intr_event((*isrc)->is_event, verbose);
}
#endif
+
+#ifdef SMP
+/*
+ * Support for balancing interrupt sources across CPUs. For now we just
+ * allocate CPUs round-robin.
+ */
+
+static u_int cpu_apic_ids[MAXCPU];
+static int current_cpu, num_cpus;
+
+static void
+intr_assign_next_cpu(struct intsrc *isrc)
+{
+ struct pic *pic;
+ u_int apic_id;
+
+ /*
+ * Assign this source to a local APIC in a round-robin fashion.
+ */
+ pic = isrc->is_pic;
+ apic_id = cpu_apic_ids[current_cpu];
+ current_cpu++;
+ if (current_cpu >= num_cpus)
+ current_cpu = 0;
+ if (bootverbose) {
+ printf("INTR: Assigning IRQ %d", pic->pic_vector(isrc));
+ printf(" to local APIC %u\n", apic_id);
+ }
+ pic->pic_assign_cpu(isrc, apic_id);
+}
+
+/*
+ * Add a local APIC ID to our list of valid local APIC IDs that can
+ * be destinations of interrupts.
+ */
+void
+intr_add_cpu(u_int apic_id)
+{
+
+ if (bootverbose)
+ printf("INTR: Adding local APIC %d as a target\n", apic_id);
+ if (num_cpus >= MAXCPU)
+ panic("WARNING: Local APIC IDs exhausted!");
+ cpu_apic_ids[num_cpus] = apic_id;
+ num_cpus++;
+}
+
+/*
+ * Distribute all the interrupt sources among the available CPUs once the
+ * AP's have been launched.
+ */
+static void
+intr_shuffle_irqs(void *arg __unused)
+{
+ struct intsrc *isrc;
+ int i;
+
+ /* Don't bother on UP. */
+ if (num_cpus <= 1)
+ return;
+
+ /* Round-robin assign each enabled source a CPU. */
+ mtx_lock_spin(&intr_table_lock);
+ assign_cpu = 1;
+ for (i = 0; i < NUM_IO_INTS; i++) {
+ isrc = interrupt_sources[i];
+ if (isrc != NULL && isrc->is_enabled)
+ intr_assign_next_cpu(isrc);
+ }
+ mtx_unlock_spin(&intr_table_lock);
+}
+SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, NULL)
+#endif
diff --git a/sys/amd64/amd64/io_apic.c b/sys/amd64/amd64/io_apic.c
index 46c6cd3..6105986 100644
--- a/sys/amd64/amd64/io_apic.c
+++ b/sys/amd64/amd64/io_apic.c
@@ -61,8 +61,6 @@ __FBSDID("$FreeBSD$");
#define IRQ_SMI (NUM_IO_INTS + 3)
#define IRQ_DISABLED (NUM_IO_INTS + 4)
-#define DEST_NONE -1
-
#define TODO printf("%s: not implemented!\n", __func__)
static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures");
@@ -82,10 +80,10 @@ struct ioapic_intsrc {
u_int io_irq;
u_int io_intpin:8;
u_int io_vector:8;
+ u_int io_cpu:8;
u_int io_activehi:1;
u_int io_edgetrigger:1;
u_int io_masked:1;
- int io_dest:5;
int io_bus:4;
};
@@ -114,7 +112,7 @@ static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
enum intr_polarity pol);
static void ioapic_suspend(struct intsrc *isrc);
static void ioapic_resume(struct intsrc *isrc);
-static void ioapic_program_destination(struct ioapic_intsrc *intpin);
+static void ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
static void ioapic_program_intpin(struct ioapic_intsrc *intpin);
static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
@@ -122,10 +120,10 @@ struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source,
ioapic_eoi_source, ioapic_enable_intr,
ioapic_vector, ioapic_source_pending,
ioapic_suspend, ioapic_resume,
- ioapic_config_intr };
-
-static int bsp_id, current_cluster, logical_clusters, next_ioapic_base;
-static u_int next_id, program_logical_dest;
+ ioapic_config_intr, ioapic_assign_cpu };
+
+static int next_ioapic_base;
+static u_int next_id;
SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
static int enable_extint;
@@ -273,14 +271,8 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
}
/* Set the destination. */
- if (intpin->io_dest == DEST_NONE) {
- low = IOART_DESTPHY;
- high = bsp_id << APIC_ID_SHIFT;
- } else {
- low = IOART_DESTLOG;
- high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT |
- APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT;
- }
+ low = IOART_DESTPHY;
+ high = intpin->io_cpu << APIC_ID_SHIFT;
/* Program the rest of the low word. */
if (intpin->io_edgetrigger)
@@ -312,7 +304,7 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
default:
KASSERT(intpin->io_vector != 0, ("No vector for IRQ %u",
intpin->io_irq));
- low |= IOART_DELLOPRI | intpin->io_vector;
+ low |= IOART_DELFIXED | intpin->io_vector;
}
/* Write the values to the APIC. */
@@ -325,60 +317,31 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
mtx_unlock_spin(&icu_lock);
}
-/*
- * Program an individual intpin's logical destination.
- */
static void
-ioapic_program_destination(struct ioapic_intsrc *intpin)
+ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id)
{
- struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
+ struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+ struct ioapic *io = (struct ioapic *)isrc->is_pic;
- KASSERT(intpin->io_dest != DEST_NONE,
- ("intpin not assigned to a cluster"));
+ intpin->io_cpu = apic_id;
if (bootverbose) {
- printf("ioapic%u: routing intpin %u (", io->io_id,
- intpin->io_intpin);
+ printf("ioapic%u: Assigning ", io->io_id);
ioapic_print_irq(intpin);
- printf(") to cluster %u\n", intpin->io_dest);
+ printf(" to local APIC %u\n", intpin->io_cpu);
}
ioapic_program_intpin(intpin);
}
static void
-ioapic_assign_cluster(struct ioapic_intsrc *intpin)
-{
-
- /*
- * Assign this intpin to a logical APIC cluster in a
- * round-robin fashion. We don't actually use the logical
- * destination for this intpin until after all the CPU's
- * have been started so that we don't end up with interrupts
- * that don't go anywhere. Another alternative might be to
- * start up the CPU's earlier so that they can handle interrupts
- * sooner.
- */
- intpin->io_dest = current_cluster;
- current_cluster++;
- if (current_cluster >= logical_clusters)
- current_cluster = 0;
- if (program_logical_dest)
- ioapic_program_destination(intpin);
-}
-
-static void
ioapic_enable_intr(struct intsrc *isrc)
{
struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
struct ioapic *io = (struct ioapic *)isrc->is_pic;
- if (intpin->io_dest == DEST_NONE) {
+ if (intpin->io_vector == 0) {
/*
* Allocate an APIC vector for this interrupt pin. Once
- * we have a vector we program the interrupt pin. Note
- * that after we have booted ioapic_assign_cluster()
- * will program the interrupt pin again, but it doesn't
- * hurt to do that and trying to avoid that adds needless
- * complication.
+ * we have a vector we program the interrupt pin.
*/
intpin->io_vector = apic_alloc_vector(intpin->io_irq);
if (bootverbose) {
@@ -388,7 +351,6 @@ ioapic_enable_intr(struct intsrc *isrc)
printf(") to vector %u\n", intpin->io_vector);
}
ioapic_program_intpin(intpin);
- ioapic_assign_cluster(intpin);
apic_enable_vector(intpin->io_vector);
}
}
@@ -469,22 +431,6 @@ ioapic_resume(struct intsrc *isrc)
}
/*
- * Allocate and return a logical cluster ID. Note that the first time
- * this is called, it returns cluster 0. ioapic_enable_intr() treats
- * the two cases of logical_clusters == 0 and logical_clusters == 1 the
- * same: one cluster of ID 0 exists. The logical_clusters == 0 case is
- * for UP kernels, which should never call this function.
- */
-int
-ioapic_next_logical_cluster(void)
-{
-
- if (logical_clusters >= APIC_MAX_CLUSTER)
- panic("WARNING: Local APIC cluster IDs exhausted!");
- return (logical_clusters++);
-}
-
-/*
* Create a plain I/O APIC object.
*/
void *
@@ -568,11 +514,10 @@ ioapic_create(uintptr_t addr, int32_t apic_id, int intbase)
}
/*
- * Route interrupts to the BSP by default using physical
- * addressing. Vectored interrupts get readdressed using
- * logical IDs to CPU clusters when they are enabled.
+ * Route interrupts to the BSP by default. Interrupts may
+ * be routed to other CPUs later after they are enabled.
*/
- intpin->io_dest = DEST_NONE;
+ intpin->io_cpu = PCPU_GET(apic_id);
if (bootverbose && intpin->io_irq != IRQ_DISABLED) {
printf("ioapic%u: intpin %d -> ", io->io_id, i);
ioapic_print_irq(intpin);
@@ -778,29 +723,9 @@ ioapic_register(void *cookie)
printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
io->io_intbase + io->io_numintr - 1);
- bsp_id = PCPU_GET(apic_id);
/* Register valid pins as interrupt sources. */
for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++)
if (pin->io_irq < NUM_IO_INTS)
intr_register_source(&pin->io_intsrc);
}
-
-/*
- * Program all the intpins to use logical destinations once the AP's
- * have been launched.
- */
-static void
-ioapic_set_logical_destinations(void *arg __unused)
-{
- struct ioapic *io;
- int i;
-
- program_logical_dest = 1;
- STAILQ_FOREACH(io, &ioapic_list, io_next)
- for (i = 0; i < io->io_numintr; i++)
- if (io->io_pins[i].io_dest != DEST_NONE)
- ioapic_program_destination(&io->io_pins[i]);
-}
-SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND,
- ioapic_set_logical_destinations, NULL)
diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c
index a2409b6..2e6a220 100644
--- a/sys/amd64/amd64/local_apic.c
+++ b/sys/amd64/amd64/local_apic.c
@@ -217,6 +217,7 @@ lapic_init(uintptr_t addr)
/* Set BSP's per-CPU local APIC ID. */
PCPU_SET(apic_id, lapic_id());
+ intr_add_cpu(PCPU_GET(apic_id));
/* Local APIC timer interrupt. */
setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYSIGT, SEL_KPL, 0);
@@ -279,7 +280,7 @@ void
lapic_setup(void)
{
struct lapic *la;
- u_int32_t value, maxlvt;
+ u_int32_t maxlvt;
register_t eflags;
char buf[MAXCOMLEN + 1];
@@ -291,19 +292,6 @@ lapic_setup(void)
/* Initialize the TPR to allow all interrupts. */
lapic_set_tpr(0);
- /* Use the cluster model for logical IDs. */
- value = lapic->dfr;
- value &= ~APIC_DFR_MODEL_MASK;
- value |= APIC_DFR_MODEL_CLUSTER;
- lapic->dfr = value;
-
- /* Set this APIC's logical ID. */
- value = lapic->ldr;
- value &= ~APIC_ID_MASK;
- value |= (la->la_cluster << APIC_ID_CLUSTER_SHIFT |
- 1 << la->la_cluster_id) << APIC_ID_SHIFT;
- lapic->ldr = value;
-
/* Setup spurious vector and enable the local APIC. */
lapic_enable();
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index a152bc1..9e67751 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -160,7 +160,7 @@ static volatile u_int cpu_ipi_pending[MAXCPU];
static u_int boot_address;
-static void set_logical_apic_ids(void);
+static void set_interrupt_apic_ids(void);
static int start_all_aps(void);
static int start_ap(int apic_id);
static void release_aps(void *dummy);
@@ -405,7 +405,7 @@ cpu_mp_start(void)
hyperthreading_cpus = logical_cpus;
}
- set_logical_apic_ids();
+ set_interrupt_apic_ids();
}
@@ -596,33 +596,29 @@ init_secondary(void)
*/
/*
- * Set the APIC logical IDs.
- *
- * We want to cluster logical CPU's within the same APIC ID cluster.
- * Since logical CPU's are aligned simply filling in the clusters in
- * APIC ID order works fine. Note that this does not try to balance
- * the number of CPU's in each cluster. (XXX?)
+ * We tell the I/O APIC code about all the CPUs we want to receive
+ * interrupts. If we don't want certain CPUs to receive IRQs we
+ * can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
*/
static void
-set_logical_apic_ids(void)
+set_interrupt_apic_ids(void)
{
- u_int apic_id, cluster, cluster_id;
+ u_int apic_id;
- /* Force us to allocate cluster 0 at the start. */
- cluster = -1;
- cluster_id = APIC_MAX_INTRACLUSTER_ID;
for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
if (!cpu_info[apic_id].cpu_present)
continue;
- if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
- cluster = ioapic_next_logical_cluster();
- cluster_id = 0;
- } else
- cluster_id++;
- if (bootverbose)
- printf("APIC ID: physical %u, logical %u:%u\n",
- apic_id, cluster, cluster_id);
- lapic_set_logical_id(apic_id, cluster, cluster_id);
+ if (cpu_info[apic_id].cpu_bsp)
+ continue;
+#if 0
+ /* Don't let hyperthreads service interrupts. */
+ if (hyperthreading_cpus > 1 &&
+ apic_id % hyperthreading_cpus != 0)
+ continue;
+#endif
+ intr_add_cpu(apic_id);
}
}
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index 7260309..c87dc7e 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -181,7 +181,6 @@ void apic_register_enumerator(struct apic_enumerator *enumerator);
void *ioapic_create(uintptr_t addr, int32_t id, int intbase);
int ioapic_disable_pin(void *cookie, u_int pin);
int ioapic_get_vector(void *cookie, u_int pin);
-int ioapic_next_logical_cluster(void);
void ioapic_register(void *cookie);
int ioapic_remap_vector(void *cookie, u_int pin, int vector);
int ioapic_set_bus(void *cookie, u_int pin, int bus_type);
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index 4d5743a..0d187c1 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -85,6 +85,7 @@ struct pic {
void (*pic_resume)(struct intsrc *);
int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
enum intr_polarity);
+ void (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
};
/* Flags for pic_disable_source() */
@@ -105,6 +106,7 @@ struct intsrc {
u_long *is_count;
u_long *is_straycount;
u_int is_index;
+ u_int is_enabled:1;
};
struct trapframe;
@@ -117,6 +119,11 @@ int elcr_probe(void);
enum intr_trigger elcr_read_trigger(u_int irq);
void elcr_resume(void);
void elcr_write_trigger(u_int irq, enum intr_trigger trigger);
+#ifdef SMP
+void intr_add_cpu(u_int apic_id);
+#else
+#define intr_add_cpu(apic_id)
+#endif
int intr_add_handler(const char *name, int vector, driver_intr_t handler,
void *arg, enum intr_type flags, void **cookiep);
int intr_config_intr(int vector, enum intr_trigger trig,
diff --git a/sys/amd64/isa/atpic.c b/sys/amd64/isa/atpic.c
index 906edda..1398e47 100644
--- a/sys/amd64/isa/atpic.c
+++ b/sys/amd64/isa/atpic.c
@@ -108,8 +108,8 @@ inthand_t
#define ATPIC(io, base, eoi, imenptr) \
{ { atpic_enable_source, atpic_disable_source, (eoi), \
atpic_enable_intr, atpic_vector, atpic_source_pending, NULL, \
- atpic_resume, atpic_config_intr }, (io), (base), \
- IDT_IO_INTS + (base), (imenptr) }
+ atpic_resume, atpic_config_intr, atpic_assign_cpu }, (io), \
+ (base), IDT_IO_INTS + (base), (imenptr) }
#define INTSRC(irq) \
{ { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ), \
@@ -142,6 +142,7 @@ static void atpic_resume(struct intsrc *isrc);
static int atpic_source_pending(struct intsrc *isrc);
static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
enum intr_polarity pol);
+static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
static void i8259_init(struct atpic *pic, int slave);
static struct atpic atpics[] = {
@@ -353,6 +354,17 @@ atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
}
static void
+atpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+
+ /*
+ * 8259A's are only used in UP in which case all interrupts always
+ * go to the sole CPU and this function shouldn't even be called.
+ */
+ panic("%s: bad cookie", __func__);
+}
+
+static void
i8259_init(struct atpic *pic, int slave)
{
int imr_addr;
OpenPOWER on IntegriCloud