From 2626eb2b2fd958dc0f683126aa84e93b939699a1 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 14 Oct 2009 00:07:05 +0400 Subject: x86, apic: Limit apic dumping, introduce new show_lapic= setup option In case if a system has a large number of cpus printing apics contents may consume a long time period. We limit such an output by 1 apic by default. But to have an ability to see all apics or some part of them we introduce "show_lapic" setup option which allow us to limit/unlimit the number of APICs being dumped. Example: apic=debug show_lapic=5, or apic=debug show_lapic=all Also move apic_verbosity checking upper that way so helper routines do not need to inspect it at all. Suggested-by: Yinghai Lu Signed-off-by: Cyrill Gorcunov Cc: yinghai@kernel.org Cc: macro@linux-mips.org LKML-Reference: <20091013201022.926793122@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 47 ++++++++++++++++++++++++++++-------------- 1 file changed, 32 insertions(+), 15 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index dc69f28..8c718c9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1599,9 +1599,6 @@ __apicdebuginit(void) print_IO_APIC(void) struct irq_desc *desc; unsigned int irq; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); for (i = 0; i < nr_ioapics; i++) printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", @@ -1708,9 +1705,6 @@ __apicdebuginit(void) print_APIC_field(int base) { int i; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG); for (i = 0; i < 8; i++) @@ -1724,9 +1718,6 @@ __apicdebuginit(void) print_local_APIC(void *dummy) unsigned int i, v, ver, maxlvt; u64 icr; - if (apic_verbosity == APIC_QUIET) - return; - printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", smp_processor_id(), hard_smp_processor_id()); v = apic_read(APIC_ID); @@ -1824,13 +1815,19 @@ __apicdebuginit(void) print_local_APIC(void *dummy) printk("\n"); } -__apicdebuginit(void) print_all_local_APICs(void) +__apicdebuginit(void) print_local_APICs(int maxcpu) { int cpu; + if (!maxcpu) + return; + preempt_disable(); - for_each_online_cpu(cpu) + for_each_online_cpu(cpu) { + if (cpu >= maxcpu) + break; smp_call_function_single(cpu, print_local_APIC, NULL, 1); + } preempt_enable(); } @@ -1839,7 +1836,7 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) + if (!nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); @@ -1866,21 +1863,41 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); } -__apicdebuginit(int) print_all_ICs(void) +static int __initdata show_lapic = 1; +static __init int setup_show_lapic(char *arg) +{ + int num = -1; + + if (strcmp(arg, "all") == 0) { + show_lapic = CONFIG_NR_CPUS; + } else { + get_option(&arg, &num); + if (num >= 0) + show_lapic = num; + } + + return 1; +} +__setup("show_lapic=", setup_show_lapic); + +__apicdebuginit(int) print_ICs(void) { + if (apic_verbosity == APIC_QUIET) + return 0; + print_PIC(); /* don't print out if apic is not there */ if (!cpu_has_apic && !apic_from_smp_config()) return 0; - print_all_local_APICs(); + print_local_APICs(show_lapic); print_IO_APIC(); return 0; } -fs_initcall(print_all_ICs); +fs_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ -- cgit v1.1 From 6c2c502910247d2820cb630e7b28fb6bdecdbf45 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Wed, 30 Sep 2009 11:02:59 -0500 Subject: x86: SGI UV: Fix irq affinity for hub based interrupts This patch fixes handling of uv hub irq affinity. IRQs with ALL or NODE affinity can be routed to cpus other than their originally assigned cpu. Those with CPU affinity cannot be rerouted. Signed-off-by: Dimitri Sivanich LKML-Reference: <20090930160259.GA7822@sgi.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 49 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 8c718c9..bb52e7f 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3731,9 +3731,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) * on the specified blade to allow the sending of MSIs to the specified CPU. */ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset) + unsigned long mmr_offset, int restrict) { const struct cpumask *eligible_cpu = cpumask_of(cpu); + struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg; int mmr_pnode; unsigned long mmr_value; @@ -3749,6 +3750,11 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, if (err != 0) return err; + if (restrict == UV_AFFINITY_CPU) + desc->status |= IRQ_NO_BALANCING; + else + desc->status |= IRQ_MOVE_PCNTXT; + spin_lock_irqsave(&vector_lock, flags); set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, irq_name); @@ -3777,11 +3783,10 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, * Disable the specified MMR located on the specified blade so that MSIs are * longer allowed to be sent. */ -void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) +void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) { unsigned long mmr_value; struct uv_IO_APIC_route_entry *entry; - int mmr_pnode; BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); @@ -3789,9 +3794,45 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) entry = (struct uv_IO_APIC_route_entry *)&mmr_value; entry->mask = 1; - mmr_pnode = uv_blade_to_pnode(mmr_blade); uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); } + +int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + unsigned int dest; + unsigned long mmr_value; + struct uv_IO_APIC_route_entry *entry; + unsigned long mmr_offset; + unsigned mmr_pnode; + + dest = set_desc_affinity(desc, mask); + if (dest == BAD_APICID) + return -1; + + mmr_value = 0; + entry = (struct uv_IO_APIC_route_entry *)&mmr_value; + + entry->vector = cfg->vector; + entry->delivery_mode = apic->irq_delivery_mode; + entry->dest_mode = apic->irq_dest_mode; + entry->polarity = 0; + entry->trigger = 0; + entry->mask = 0; + entry->dest = dest; + + /* Get previously stored MMR and pnode of hub sourcing interrupts */ + if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) + return -1; + + uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); + + if (cfg->move_in_progress) + send_cleanup_vector(cfg); + + return 0; +} #endif /* CONFIG_X86_64 */ int __init io_apic_get_redir_entries (int ioapic) -- cgit v1.1 From 9338ad6ffb70eca97f335d93c54943828c8b209e Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 13 Oct 2009 15:32:36 -0500 Subject: x86, apic: Move SGI UV functionality out of generic IO-APIC code Move UV specific functionality out of the generic IO-APIC code. Signed-off-by: Dimitri Sivanich LKML-Reference: <20091013203236.GD20543@sgi.com> [ Cleaned up the code some more in their new places. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 140 ++--------------------------------------- 1 file changed, 5 insertions(+), 135 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index bb52e7f..ce16b65 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -60,8 +60,6 @@ #include #include #include -#include -#include #include @@ -140,20 +138,6 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) return pin; } -/* - * This is performance-critical, we want to do it O(1) - * - * Most irqs are mapped 1:1 with pins. - */ -struct irq_cfg { - struct irq_pin_list *irq_2_pin; - cpumask_var_t domain; - cpumask_var_t old_domain; - unsigned move_cleanup_count; - u8 vector; - u8 move_in_progress : 1; -}; - /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[] = { @@ -209,7 +193,7 @@ int __init arch_early_irq_init(void) } #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { struct irq_cfg *cfg = NULL; struct irq_desc *desc; @@ -361,7 +345,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) /* end for move_irq_desc */ #else -static struct irq_cfg *irq_cfg(unsigned int irq) +struct irq_cfg *irq_cfg(unsigned int irq) { return irq < nr_irqs ? irq_cfgx + irq : NULL; } @@ -1237,8 +1221,7 @@ next: return err; } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) +int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) { int err; unsigned long flags; @@ -2245,7 +2228,7 @@ static int ioapic_retrigger_irq(unsigned int irq) */ #ifdef CONFIG_SMP -static void send_cleanup_vector(struct irq_cfg *cfg) +void send_cleanup_vector(struct irq_cfg *cfg) { cpumask_var_t cleanup_mask; @@ -2289,15 +2272,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq } } -static int -assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); - /* * Either sets desc->affinity to a valid value, and returns * ->cpu_mask_to_apicid of that, or returns BAD_APICID and * leaves desc->affinity untouched. */ -static unsigned int +unsigned int set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) { struct irq_cfg *cfg; @@ -3725,116 +3705,6 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) } #endif /* CONFIG_HT_IRQ */ -#ifdef CONFIG_X86_UV -/* - * Re-target the irq to the specified CPU and enable the specified MMR located - * on the specified blade to allow the sending of MSIs to the specified CPU. - */ -int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, - unsigned long mmr_offset, int restrict) -{ - const struct cpumask *eligible_cpu = cpumask_of(cpu); - struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg; - int mmr_pnode; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long flags; - int err; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - cfg = irq_cfg(irq); - - err = assign_irq_vector(irq, cfg, eligible_cpu); - if (err != 0) - return err; - - if (restrict == UV_AFFINITY_CPU) - desc->status |= IRQ_NO_BALANCING; - else - desc->status |= IRQ_MOVE_PCNTXT; - - spin_lock_irqsave(&vector_lock, flags); - set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq, - irq_name); - spin_unlock_irqrestore(&vector_lock, flags); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); - - mmr_pnode = uv_blade_to_pnode(mmr_blade); - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return irq; -} - -/* - * Disable the specified MMR located on the specified blade so that MSIs are - * longer allowed to be sent. - */ -void arch_disable_uv_irq(int mmr_pnode, unsigned long mmr_offset) -{ - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - - BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - entry->mask = 1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); -} - -int uv_set_irq_affinity(unsigned int irq, const struct cpumask *mask) -{ - struct irq_desc *desc = irq_to_desc(irq); - struct irq_cfg *cfg = desc->chip_data; - unsigned int dest; - unsigned long mmr_value; - struct uv_IO_APIC_route_entry *entry; - unsigned long mmr_offset; - unsigned mmr_pnode; - - dest = set_desc_affinity(desc, mask); - if (dest == BAD_APICID) - return -1; - - mmr_value = 0; - entry = (struct uv_IO_APIC_route_entry *)&mmr_value; - - entry->vector = cfg->vector; - entry->delivery_mode = apic->irq_delivery_mode; - entry->dest_mode = apic->irq_dest_mode; - entry->polarity = 0; - entry->trigger = 0; - entry->mask = 0; - entry->dest = dest; - - /* Get previously stored MMR and pnode of hub sourcing interrupts */ - if (uv_irq_2_mmr_info(irq, &mmr_offset, &mmr_pnode)) - return -1; - - uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); - - if (cfg->move_in_progress) - send_cleanup_vector(cfg); - - return 0; -} -#endif /* CONFIG_X86_64 */ - int __init io_apic_get_redir_entries (int ioapic) { union IO_APIC_reg_01 reg_01; -- cgit v1.1 From 23359a88e7eca3c4f402562b102f23014db3c2aa Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:33 -0800 Subject: x86: Remove move_cleanup_count from irq_cfg move_cleanup_count for each irq in irq_cfg is keeping track of the total number of cpus that need to free the corresponding vectors associated with the irq which has now been migrated to new destination. As long as this move_cleanup_count is non-zero (i.e., as long as we have n't freed the vector allocations on the old destinations) we were preventing the irq's further migration. This cleanup count is unnecessary and it is enough to not allow the irq migration till we send the cleanup vector to the previous irq destination, for which we already have irq_cfg's move_in_progress. All we need to make sure is that we free the vector at the old desintation but we don't need to wait till that gets freed. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.752968906@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ce16b65..e9e5b02 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1161,7 +1161,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int cpu, err; cpumask_var_t tmp_mask; - if ((cfg->move_in_progress) || cfg->move_cleanup_count) + if (cfg->move_in_progress) return -EBUSY; if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) @@ -2234,14 +2234,10 @@ void send_cleanup_vector(struct irq_cfg *cfg) if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { unsigned int i; - cfg->move_cleanup_count = 0; - for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) - cfg->move_cleanup_count++; for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); } else { cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); - cfg->move_cleanup_count = cpumask_weight(cleanup_mask); apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); free_cpumask_var(cleanup_mask); } @@ -2430,8 +2426,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) cfg = irq_cfg(irq); spin_lock(&desc->lock); - if (!cfg->move_cleanup_count) - goto unlock; if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) goto unlock; @@ -2449,7 +2443,6 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) goto unlock; } __get_cpu_var(vector_irq)[vector] = -1; - cfg->move_cleanup_count--; unlock: spin_unlock(&desc->lock); } -- cgit v1.1 From a5e74b841930bec78a4684ab9f208b2ddfe7c736 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:34 -0800 Subject: x86: Force irq complete move during cpu offline When a cpu goes offline, fixup_irqs() try to move irq's currently destined to the offline cpu to a new cpu. But this attempt will fail if the irq is recently moved to this cpu and the irq still hasn't arrived at this cpu (for non intr-remapping platforms this is when we free the vector allocation at the previous destination) that is about to go offline. This will endup with the interrupt subsystem still pointing the irq to the offline cpu, causing that irq to not work any more. Fix this by forcing the irq to complete its move (its been a long time we moved the irq to this cpu which we are offlining now) and then move this irq to a new cpu before this cpu goes offline. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.848830905@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e9e5b02..4e886ef 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2450,21 +2450,33 @@ unlock: irq_exit(); } -static void irq_complete_move(struct irq_desc **descp) +static void __irq_complete_move(struct irq_desc **descp, unsigned vector) { struct irq_desc *desc = *descp; struct irq_cfg *cfg = desc->chip_data; - unsigned vector, me; + unsigned me; if (likely(!cfg->move_in_progress)) return; - vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); } + +static void irq_complete_move(struct irq_desc **descp) +{ + __irq_complete_move(descp, ~get_irq_regs()->orig_ax); +} + +void irq_force_complete_move(int irq) +{ + struct irq_desc *desc = irq_to_desc(irq); + struct irq_cfg *cfg = desc->chip_data; + + __irq_complete_move(&desc, cfg->vector); +} #else static inline void irq_complete_move(struct irq_desc **descp) {} #endif -- cgit v1.1 From b3ec0a37a7907813bb4fb85a2d94102c152470b7 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Mon, 26 Oct 2009 14:24:35 -0800 Subject: x86: Use EOI register in io-apic on intel platforms IO-APIC's in intel chipsets support EOI register starting from IO-APIC version 2. Use that when ever we need to clear the IO-APIC RTE's RemoteIRR bit explicitly. Signed-off-by: Suresh Siddha Acked-by: Gary Hade Cc: Eric W. Biederman LKML-Reference: <20091026230001.947855317@sbs-t61.sc.intel.com> [ Marked use_eio_reg as __read_mostly, fixed small details ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 81 ++++++++++++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 27 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 4e886ef..31e9db3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2492,6 +2492,51 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; +static int use_eoi_reg __read_mostly; + +static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) +{ + struct irq_pin_list *entry; + + for_each_irq_pin(entry, cfg->irq_2_pin) { + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } +} + +static void eoi_ioapic_irq(struct irq_desc *desc) +{ + struct irq_cfg *cfg; + unsigned long flags; + unsigned int irq; + + irq = desc->irq; + cfg = desc->chip_data; + + spin_lock_irqsave(&ioapic_lock, flags); + __eoi_ioapic_irq(irq, cfg); + spin_unlock_irqrestore(&ioapic_lock, flags); +} + +static int ioapic_supports_eoi(void) +{ + struct pci_dev *root; + + root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); + if (root && root->vendor == PCI_VENDOR_ID_INTEL && + mp_ioapics[0].apicver >= 0x2) { + use_eoi_reg = 1; + printk(KERN_INFO "IO-APIC supports EOI register\n"); + } else + printk(KERN_INFO "IO-APIC doesn't support EOI\n"); + + return 0; +} + +fs_initcall(ioapic_supports_eoi); + static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2575,37 +2620,19 @@ static void ack_apic_level(unsigned int irq) /* Tail end of version 0x11 I/O APIC bug workaround */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); + + if (use_eoi_reg) + eoi_ioapic_irq(desc); + else { + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } } } #ifdef CONFIG_INTR_REMAP -static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) -{ - struct irq_pin_list *entry; - - for_each_irq_pin(entry, cfg->irq_2_pin) - io_apic_eoi(entry->apic, entry->pin); -} - -static void -eoi_ioapic_irq(struct irq_desc *desc) -{ - struct irq_cfg *cfg; - unsigned long flags; - unsigned int irq; - - irq = desc->irq; - cfg = desc->chip_data; - - spin_lock_irqsave(&ioapic_lock, flags); - __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); -} - static void ir_ack_apic_edge(unsigned int irq) { ack_APIC_irq(); -- cgit v1.1 From 46dc281b1bb02527195fe2ad50a3af6d7f7f7325 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 18:53:56 +0300 Subject: x86, apic: Use PAGE_SIZE instead of numbers The whole page is reserved for IO-APIC fixmap due to non-cacheable requirement. So lets note this explicitly instead of playing with numbers. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki LKML-Reference: <20091108155356.GB25940@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 31e9db3..9ee1c16 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4111,7 +4111,7 @@ fake_ioapic_page: idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + (4 * 1024) - 1; + ioapic_res->end = ioapic_phys + PAGE_SIZE-1; ioapic_res++; } } -- cgit v1.1 From 4343fe1024e09e17667f95620ed3e69a7a5f4389 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 8 Nov 2009 18:54:31 +0300 Subject: x86, ioapic: Use snrpintf while set names for IO-APIC resourses We should be ready that one day MAX_IO_APICS may raise its number. To prevent memory overwrite we're to use safe snprintf while set IO-APIC resourse name. Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu LKML-Reference: <20091108155431.GC25940@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9ee1c16..24d1458 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4066,7 +4066,7 @@ static struct resource * __init ioapic_setup_resources(int nr_ioapics) for (i = 0; i < nr_ioapics; i++) { res[i].name = mem; res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; - sprintf(mem, "IOAPIC %u", i); + snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i); mem += IOAPIC_RESOURCE_NAME_SIZE; } -- cgit v1.1 From 7abc07531383ac7f727cc9d44e1360a829f2082e Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Tue, 10 Nov 2009 01:06:59 +0300 Subject: x86: apic: Do not use stacked physid_mask_t We should not use physid_mask_t as a stack based variable in apic code. This type depends on MAX_APICS parameter which may be huge enough. Especially it became a problem with apic NOOP driver which is portable between 32 bit and 64 bit environment (where we have really huge MAX_APICS). So apic driver should operate with pointers and a caller in turn should aware of allocation physid_mask_t variable. As a side (but positive) effect -- we may use already implemented physid_set_mask_of_physid function eliminating default_apicid_to_cpu_present completely. Note that physids_coerce and physids_promote turned into static inline from macro (since macro hides the fact that parameter is being interpreted as unsigned long, make it explicit). Signed-off-by: Cyrill Gorcunov Cc: Yinghai Lu Cc: Maciej W. Rozycki Cc: Stephen Rothwell LKML-Reference: <20091109220659.GA5568@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 24d1458..20ea839 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2031,7 +2031,7 @@ void __init setup_ioapic_ids_from_mpc(void) * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. */ - phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map); /* * Set the IOAPIC ID to the value stored in the MPC table. @@ -2058,7 +2058,7 @@ void __init setup_ioapic_ids_from_mpc(void) * system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(phys_id_present_map, + if (apic->check_apicid_used(&phys_id_present_map, mp_ioapics[apic_id].apicid)) { printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", apic_id, mp_ioapics[apic_id].apicid); @@ -2073,7 +2073,7 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid = i; } else { physid_mask_t tmp; - tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid); + apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp); apic_printk(APIC_VERBOSE, "Setting %d in the " "phys_id_present_map\n", mp_ioapics[apic_id].apicid); @@ -3904,7 +3904,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) */ if (physids_empty(apic_id_map)) - apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map); + apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); @@ -3920,10 +3920,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) * Every APIC in a system must have a unique ID or we get lots of nice * 'stuck on smp_invalidate_needed IPI wait' messages. */ - if (apic->check_apicid_used(apic_id_map, apic_id)) { + if (apic->check_apicid_used(&apic_id_map, apic_id)) { for (i = 0; i < get_physical_broadcast(); i++) { - if (!apic->check_apicid_used(apic_id_map, i)) + if (!apic->check_apicid_used(&apic_id_map, i)) break; } @@ -3936,7 +3936,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) apic_id = i; } - tmp = apic->apicid_to_cpu_present(apic_id); + apic->apicid_to_cpu_present(apic_id, &tmp); physids_or(apic_id_map, apic_id_map, tmp); if (reg_00.bits.ID != apic_id) { -- cgit v1.1 From e79c65a97c01d5da4317f44f9f98b3814e091a43 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 16 Nov 2009 18:14:26 +0300 Subject: x86: io-apic: IO-APIC MMIO should not fail on resource insertion If IO-APIC base address is 1K aligned we should not fail on resourse insertion procedure. For this sake we define IO_APIC_SLOT_SIZE constant which should cover all IO-APIC direct accessible registers. An example of a such configuration is there http://marc.info/?l=linux-kernel&m=118114792006520 | | Quoting the message | | IOAPIC[0]: apic_id 2, version 32, address 0xfec00000, GSI 0-23 | IOAPIC[1]: apic_id 3, version 32, address 0xfec80000, GSI 24-47 | IOAPIC[2]: apic_id 4, version 32, address 0xfec80400, GSI 48-71 | IOAPIC[3]: apic_id 5, version 32, address 0xfec84000, GSI 72-95 | IOAPIC[4]: apic_id 8, version 32, address 0xfec84400, GSI 96-119 | Reported-by: "Maciej W. Rozycki" Signed-off-by: Cyrill Gorcunov Acked-by: Yinghai Lu LKML-Reference: <20091116151426.GC5653@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 20ea839..ff23719 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -4100,18 +4100,17 @@ void __init ioapic_init_mappings(void) #ifdef CONFIG_X86_32 fake_ioapic_page: #endif - ioapic_phys = (unsigned long) - alloc_bootmem_pages(PAGE_SIZE); + ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE); ioapic_phys = __pa(ioapic_phys); } set_fixmap_nocache(idx, ioapic_phys); - apic_printk(APIC_VERBOSE, - "mapped IOAPIC to %08lx (%08lx)\n", - __fix_to_virt(idx), ioapic_phys); + apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n", + __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK), + ioapic_phys); idx++; ioapic_res->start = ioapic_phys; - ioapic_res->end = ioapic_phys + PAGE_SIZE-1; + ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } } -- cgit v1.1 From 37ef2a3029fde884808ff1b369677abc7dd9a79a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 21 Nov 2009 00:23:37 -0800 Subject: x86: Re-get cfg_new in case reuse/move irq_desc When irq_desc is moved, we need to make sure to use the right cfg_new. Signed-off-by: Yinghai Lu LKML-Reference: <4B07A739.3030104@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index ff23719..085e60e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3186,6 +3186,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) continue; desc_new = move_irq_desc(desc_new, node); + cfg_new = desc_new->chip_data; if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) irq = new; -- cgit v1.1 From ca64c47cecd0321b2e0dcbd7aaff44b68ce20654 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 1 Dec 2009 15:31:15 -0800 Subject: x86, io-apic: Move the effort of clearing remoteIRR explicitly before migrating the irq When the level-triggered interrupt is seen as an edge interrupt, we try to clear the remoteIRR explicitly (using either an io-apic eoi register when present or through the idea of changing trigger mode of the io-apic RTE to edge and then back to level). But this explicit try also needs to happen before we try to migrate the irq. Otherwise irq migration attempt will fail anyhow, as it postpones the irq migration to a later attempt when it sees the remoteIRR in the io-apic RTE still set. Signed-off-by: "Maciej W. Rozycki" Reviewed-by: Suresh Siddha Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233334.975416130@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 085e60e..b377b97 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2583,6 +2583,20 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); + /* Tail end of version 0x11 I/O APIC bug workaround */ + if (!(v & (1 << (i & 0x1f)))) { + atomic_inc(&irq_mis_count); + + if (use_eoi_reg) + eoi_ioapic_irq(desc); + else { + spin_lock(&ioapic_lock); + __mask_and_edge_IO_APIC_irq(cfg); + __unmask_and_level_IO_APIC_irq(cfg); + spin_unlock(&ioapic_lock); + } + } + /* Now we can move and renable the irq */ if (unlikely(do_unmask_irq)) { /* Only migrate the irq if the ack has been received. @@ -2616,20 +2630,6 @@ static void ack_apic_level(unsigned int irq) move_masked_irq(irq); unmask_IO_APIC_irq_desc(desc); } - - /* Tail end of version 0x11 I/O APIC bug workaround */ - if (!(v & (1 << (i & 0x1f)))) { - atomic_inc(&irq_mis_count); - - if (use_eoi_reg) - eoi_ioapic_irq(desc); - else { - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } - } } #ifdef CONFIG_INTR_REMAP -- cgit v1.1 From c29d9db338db606c3335a03f337e1d4b7f6bb727 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:16 -0800 Subject: x86, ioapic: Fix the EOI register detection mechanism Maciej W. Rozycki reported: > 82093AA I/O APIC has its version set to 0x11 and it > does not support the EOI register. Similarly I/O APICs > integrated into the 82379AB south bridge and the 82374EB/SB > EISA component. IO-APIC versions below 0x20 don't support EOI register. Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic version as 0x2. This is an error with documentation and these ICH chips use io-apic's of version 0x20 and indeed has a working EOI register for the io-apic. Fix the EOI register detection mechanism to check for version 0x20 and beyond. And also, a platform can potentially have io-apic's with different versions. Make the EOI register check per io-apic. Reported-by: Maciej W. Rozycki Signed-off-by: Suresh Siddha Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.065361533@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 115 ++++++++++++++++++++++------------------- 1 file changed, 61 insertions(+), 54 deletions(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index b377b97..78960a3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -539,23 +539,41 @@ static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, add_pin_to_irq_node(cfg, node, newapic, newpin); } +static void __io_apic_modify_irq(struct irq_pin_list *entry, + int mask_and, int mask_or, + void (*final)(struct irq_pin_list *entry)) +{ + unsigned int reg, pin; + + pin = entry->pin; + reg = io_apic_read(entry->apic, 0x10 + pin * 2); + reg &= mask_and; + reg |= mask_or; + io_apic_modify(entry->apic, 0x10 + pin * 2, reg); + if (final) + final(entry); +} + static void io_apic_modify_irq(struct irq_cfg *cfg, int mask_and, int mask_or, void (*final)(struct irq_pin_list *entry)) { - int pin; struct irq_pin_list *entry; - for_each_irq_pin(entry, cfg->irq_2_pin) { - unsigned int reg; - pin = entry->pin; - reg = io_apic_read(entry->apic, 0x10 + pin * 2); - reg &= mask_and; - reg |= mask_or; - io_apic_modify(entry->apic, 0x10 + pin * 2, reg); - if (final) - final(entry); - } + for_each_irq_pin(entry, cfg->irq_2_pin) + __io_apic_modify_irq(entry, mask_and, mask_or, final); +} + +static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER, + IO_APIC_REDIR_MASKED, NULL); +} + +static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry) +{ + __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED, + IO_APIC_REDIR_LEVEL_TRIGGER, NULL); } static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) @@ -579,18 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); } -static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, - IO_APIC_REDIR_MASKED, NULL); -} - -static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) -{ - io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, - IO_APIC_REDIR_LEVEL_TRIGGER, NULL); -} - static void mask_IO_APIC_irq_desc(struct irq_desc *desc) { struct irq_cfg *cfg = desc->chip_data; @@ -2492,17 +2498,42 @@ static void ack_apic_edge(unsigned int irq) atomic_t irq_mis_count; -static int use_eoi_reg __read_mostly; - +/* + * IO-APIC versions below 0x20 don't support EOI register. + * For the record, here is the information about various versions: + * 0Xh 82489DX + * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant + * 2Xh I/O(x)APIC which is PCI 2.2 Compliant + * 30h-FFh Reserved + * + * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic + * version as 0x2. This is an error with documentation and these ICH chips + * use io-apic's of version 0x20. + * + * For IO-APIC's with EOI register, we use that to do an explicit EOI. + * Otherwise, we simulate the EOI message manually by changing the trigger + * mode to edge and then back to level, with RTE being masked during this. +*/ static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { struct irq_pin_list *entry; for_each_irq_pin(entry, cfg->irq_2_pin) { - if (irq_remapped(irq)) - io_apic_eoi(entry->apic, entry->pin); - else - io_apic_eoi(entry->apic, cfg->vector); + if (mp_ioapics[entry->apic].apicver >= 0x20) { + /* + * Intr-remapping uses pin number as the virtual vector + * in the RTE. Actual vector is programmed in + * intr-remapping table entry. Hence for the io-apic + * EOI we use the pin number. + */ + if (irq_remapped(irq)) + io_apic_eoi(entry->apic, entry->pin); + else + io_apic_eoi(entry->apic, cfg->vector); + } else { + __mask_and_edge_IO_APIC_irq(entry); + __unmask_and_level_IO_APIC_irq(entry); + } } } @@ -2520,23 +2551,6 @@ static void eoi_ioapic_irq(struct irq_desc *desc) spin_unlock_irqrestore(&ioapic_lock, flags); } -static int ioapic_supports_eoi(void) -{ - struct pci_dev *root; - - root = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0)); - if (root && root->vendor == PCI_VENDOR_ID_INTEL && - mp_ioapics[0].apicver >= 0x2) { - use_eoi_reg = 1; - printk(KERN_INFO "IO-APIC supports EOI register\n"); - } else - printk(KERN_INFO "IO-APIC doesn't support EOI\n"); - - return 0; -} - -fs_initcall(ioapic_supports_eoi); - static void ack_apic_level(unsigned int irq) { struct irq_desc *desc = irq_to_desc(irq); @@ -2587,14 +2601,7 @@ static void ack_apic_level(unsigned int irq) if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); - if (use_eoi_reg) - eoi_ioapic_irq(desc); - else { - spin_lock(&ioapic_lock); - __mask_and_edge_IO_APIC_irq(cfg); - __unmask_and_level_IO_APIC_irq(cfg); - spin_unlock(&ioapic_lock); - } + eoi_ioapic_irq(desc); } /* Now we can move and renable the irq */ -- cgit v1.1 From 1c83995b6c7c6bb795bce80f75fbffb15f78db2d Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 1 Dec 2009 15:31:17 -0800 Subject: x86, ioapic: Document another case when level irq is seen as an edge In the case when cpu goes offline, fixup_irqs() will forward any unhandled interrupt on the offlined cpu to the new cpu destination that is handling the corresponding interrupt. This interrupt forwarding is done via IPI's. Hence, in this case also level-triggered io-apic interrupt will be seen as an edge interrupt in the cpu's APIC IRR. Document this scenario in the code which handles this case by doing an explicit EOI to the io-apic to clear remote IRR of the io-apic RTE. Requested-by: Maciej W. Rozycki Signed-off-by: Suresh Siddha Cc: Maciej W. Rozycki Cc: ebiederm@xmission.com Cc: garyhade@us.ibm.com LKML-Reference: <20091201233335.143970505@sbs-t61.sc.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86/kernel/apic/io_apic.c') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 78960a3..c0b4468 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2586,6 +2586,19 @@ static void ack_apic_level(unsigned int irq) * level-triggered interrupt. We mask the source for the time of the * operation to prevent an edge-triggered interrupt escaping meanwhile. * The idea is from Manfred Spraul. --macro + * + * Also in the case when cpu goes offline, fixup_irqs() will forward + * any unhandled interrupt on the offlined cpu to the new cpu + * destination that is handling the corresponding interrupt. This + * interrupt forwarding is done via IPI's. Hence, in this case also + * level-triggered io-apic interrupt will be seen as an edge + * interrupt in the IRR. And we can't rely on the cpu's EOI + * to be broadcasted to the IO-APIC's which will clear the remoteIRR + * corresponding to the level-triggered interrupt. Hence on IO-APIC's + * supporting EOI register, we do an explicit EOI to clear the + * remote IRR and on IO-APIC's which don't have an EOI register, + * we use the above logic (mask+edge followed by unmask+level) from + * Manfred Spraul to clear the remote IRR. */ cfg = desc->chip_data; i = cfg->vector; @@ -2597,7 +2610,13 @@ static void ack_apic_level(unsigned int irq) */ ack_APIC_irq(); - /* Tail end of version 0x11 I/O APIC bug workaround */ + /* + * Tail end of clearing remote IRR bit (either by delivering the EOI + * message via io-apic EOI register write or simulating it using + * mask+edge followed by unnask+level logic) manually when the + * level triggered interrupt is seen as the edge triggered interrupt + * at the cpu. + */ if (!(v & (1 << (i & 0x1f)))) { atomic_inc(&irq_mis_count); -- cgit v1.1