From 9959c888a38b0f25b0e81a480f537d6489348442 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Mon, 28 Dec 2009 21:08:29 -0800 Subject: x86: Increase NR_IRQS and nr_irqs I have a system with lots of igb and ixgbe, when iov/vf are enabled for them, we hit the limit of 3064. when system has 20 pcie installed, and one card has 2 functions, and one function needs 64 msi-x, may need 20 * 2 * 64 = 2560 for msi-x but if iov and vf are enabled may need 20 * 2 * 64 * 3 = 7680 for msi-x assume system with 5 ioapic, nr_irqs_gsi will be 120. NR_CPUS = 512, and nr_cpu_ids = 128 will have NR_IRQS = 256 + 512 * 64 = 33024 will have nr_irqs = 120 + 8 * 128 + 120 * 64 = 8824 When SPARSE_IRQ is not set, there is no increase with kernel data size. when NR_CPUS=128, and SPARSE_IRQ is set: text data bss dec hex filename 21837444 4216564 12480736 38534744 24bfe58 vmlinux.before 21837442 4216580 12480736 38534758 24bfe66 vmlinux.after when NR_CPUS=4096, and SPARSE_IRQ is set text data bss dec hex filename 21878619 5610244 13415392 40904255 270263f vmlinux.before 21878617 5610244 13415392 40904253 270263d vmlinux.after Signed-off-by: Yinghai Lu Cc: Linus Torvalds Cc: Andrew Morton LKML-Reference: <4B398ECD.1080506@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/irq_vectors.h | 12 ++++++------ arch/x86/kernel/apic/io_apic.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 4611f08..3ab43df 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -154,21 +154,21 @@ static inline int invalid_vm86_irq(int irq) #define NR_IRQS_LEGACY 16 -#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) #define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) #ifdef CONFIG_X86_IO_APIC # ifdef CONFIG_SPARSE_IRQ +# define CPU_VECTOR_LIMIT (64 * NR_CPUS) # define NR_IRQS \ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # else -# if NR_CPUS < MAX_IO_APICS -# define NR_IRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) -# else -# define NR_IRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) -# endif +# define CPU_VECTOR_LIMIT (32 * NR_CPUS) +# define NR_IRQS \ + (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # endif #else /* !CONFIG_X86_IO_APIC: */ # define NR_IRQS NR_IRQS_LEGACY diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index de00c46..d9cd1f1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3840,7 +3840,7 @@ int __init arch_probe_nr_irqs(void) /* * for MSI and HT dyn irq */ - nr += nr_irqs_gsi * 16; + nr += nr_irqs_gsi * 64; #endif if (nr < nr_irqs) nr_irqs = nr; -- cgit v1.1 From 99d113b17e8ca5a8b68a9d3f7691e2f552dd6a06 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Jan 2010 16:16:06 -0800 Subject: x86, apic: Reclaim IDT vectors 0x20-0x2f Reclaim 16 IDT vectors and make them available for general allocation. Reclaim vectors 0x20-0x2f by reallocating the IRQ_MOVE_CLEANUP_VECTOR to vector 0x1f. This is in the range of vector numbers that is officially reserved for the CPU (for exceptions), however, the use of the APIC to generate any vector 0x10 or above is documented, and the CPU internally can receive any vector number (the legacy BIOS uses INT 0x08-0x0f for interrupts, as messed up as that is.) Since IRQ_MOVE_CLEANUP_VECTOR has to be alone in the lowest-numbered priority level (block of 16), this effectively enables us to reclaim an otherwise-unusable APIC priority level and put it to use. Since this is a transient kernel-only allocation we can change it at any time, and if/when there is an exception at vector 0x1f this assignment needs to be changed as part of OS enabling that new feature. Signed-off-by: Yinghai Lu LKML-Reference: <4B4284C6.9030107@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq_vectors.h | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3ab43df..dbc81ac 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -30,26 +30,38 @@ /* * IDT vectors usable for external interrupt sources start * at 0x20: + * hpa said we can start from 0x1f. + * 0x1f is documented as reserved. However, the ability for the APIC + * to generate vectors starting at 0x10 is documented, as is the + * ability for the CPU to receive any vector number as an interrupt. + * 0x1f is used for IRQ_MOVE_CLEANUP_VECTOR since that vector needs + * an entire privilege level (16 vectors) all by itself at a higher + * priority than any actual device vector. Thus, by placing it in the + * otherwise-unusable 0x10 privilege level, we avoid wasting a full + * 16-vector block. */ -#define FIRST_EXTERNAL_VECTOR 0x20 +#define FIRST_EXTERNAL_VECTOR 0x1f +#define IA32_SYSCALL_VECTOR 0x80 #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 -# define IA32_SYSCALL_VECTOR 0x80 -#else -# define IA32_SYSCALL_VECTOR 0x80 #endif /* - * Reserve the lowest usable priority level 0x20 - 0x2f for triggering + * Reserve the lowest usable priority level 0x10 - 0x1f for triggering * cleanup after irq migration. + * this overlaps with the reserved range for cpu exceptions so this + * will need to be changed to 0x20 - 0x2f if the last cpu exception is + * ever allocated. */ + #define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR /* - * Vectors 0x30-0x3f are used for ISA interrupts. + * Vectors 0x20-0x2f are used for ISA interrupts. + * round up to the next 16-vector boundary */ -#define IRQ0_VECTOR (FIRST_EXTERNAL_VECTOR + 0x10) +#define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) #define IRQ1_VECTOR (IRQ0_VECTOR + 1) #define IRQ2_VECTOR (IRQ0_VECTOR + 2) @@ -122,7 +134,7 @@ /* * First APIC vector available to drivers: (vectors 0x30-0xee) we - * start at 0x31(0x41) to spread out vectors evenly between priority + * start at 0x31 to spread out vectors evenly between priority * levels. (0x80 is the syscall vector) */ #define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) -- cgit v1.1 From ea94396629a3e0cb9a3a9c75335b1de255b30426 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Mon, 4 Jan 2010 21:14:41 -0800 Subject: x86, apic: Don't waste a vector to improve vector spread We want to use a vector-assignment sequence that avoids stumbling onto 0x80 earlier in the sequence, in order to improve the spread of vectors across priority levels on machines with a small number of interrupt sources. Right now, this is done by simply making the first vector (0x31 or 0x41) completely unusable. This is unnecessary; all we need is to start assignment at a +1 offset, we don't actually need to prohibit the usage of this vector once we have wrapped around. Signed-off-by: H. Peter Anvin LKML-Reference: <4B426550.6000209@kernel.org> --- arch/x86/include/asm/irq_vectors.h | 9 +++++---- arch/x86/kernel/apic/io_apic.c | 3 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index dbc81ac..585a428 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -133,11 +133,12 @@ #define MCE_SELF_VECTOR 0xeb /* - * First APIC vector available to drivers: (vectors 0x30-0xee) we - * start at 0x31 to spread out vectors evenly between priority - * levels. (0x80 is the syscall vector) + * First APIC vector available to drivers: (vectors 0x30-0xee). We + * start allocating at 0x31 to spread out vectors evenly between + * priority levels. (0x80 is the syscall vector) */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 2) +#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 1) +#define VECTOR_OFFSET_START 1 #define NR_VECTORS 256 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index d9cd1f1..e9ba090 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1162,7 +1162,8 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + static int current_vector = FIRST_DEVICE_VECTOR + VECTOR_OFFSET_START; + static int current_offset = VECTOR_OFFSET_START % 8; unsigned int old_vector; int cpu, err; cpumask_var_t tmp_mask; -- cgit v1.1 From 722b3654852e48b93367a63f8ada9ee1cd43f2d3 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Jan 2010 16:19:10 -0800 Subject: x86, vmi: Fix vmi_get_timer_vector() to use IRQ0_VECTOR FIRST_DEVICE_VECTOR is going away and it looks like a bad hack to steal FIRST_DEVICE_VECTOR / FIRST_EXTERNAL_VECTOR, when it looks like it needs IRQ0_VECTOR. Fix vmi_get_timer_vector() to use IRQ0_VECTOR. Signed-off-by: Suresh Siddha LKML-Reference: <20100114002118.436172066@sbs-t61.sc.intel.com> Cc: Alok N Kataria Cc: Zach Amsden Signed-off-by: H. Peter Anvin --- arch/x86/kernel/vmiclock_32.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 74c92bb..1268d99 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -79,11 +79,7 @@ unsigned long vmi_tsc_khz(void) static inline unsigned int vmi_get_timer_vector(void) { -#ifdef CONFIG_X86_IO_APIC - return FIRST_DEVICE_VECTOR; -#else - return FIRST_EXTERNAL_VECTOR; -#endif + return IRQ0_VECTOR; } /** vmi clockchip */ @@ -239,8 +235,6 @@ void __init vmi_time_init(void) vmi_time_init_clockevent(); setup_irq(0, &vmi_clock_action); - for_each_possible_cpu(cpu) - per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; } #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.1 From 6579b474572fd54c583ac074e8e7aaae926c62ef Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Wed, 13 Jan 2010 16:19:11 -0800 Subject: x86, irq: Use 0x20 for the IRQ_MOVE_CLEANUP_VECTOR instead of 0x1f After talking to some more folks inside intel (Peter Anvin, Asit Mallick), the safest option (for future compatibility etc) seen was to use vector 0x20 for IRQ_MOVE_CLEANUP_VECTOR instead of using vector 0x1f (which is documented as reserved vector in the Intel IA32 manuals). Also we don't need to reserve the entire privilege level (all 16 vectors in the priority bucket that IRQ_MOVE_CLEANUP_VECTOR falls into), as the x86 architecture (section 10.9.3 in SDM Vol3a) specifies that with in the priority level, the higher the vector number the higher the priority. And hence we don't need to reserve the complete priority level 0x20-0x2f for the IRQ migration cleanup logic. So change the IRQ_MOVE_CLEANUP_VECTOR to 0x20 and allow 0x21-0x2f to be used for device interrupts. 0x30-0x3f will be used for ISA interrupts (these also can be migrated in the context of IOAPIC and hence need to be at a higher priority level than IRQ_MOVE_CLEANUP_VECTOR). Signed-off-by: Suresh Siddha LKML-Reference: <20100114002118.521826763@sbs-t61.sc.intel.com> Cc: Yinghai Lu Cc: Eric W. Biederman Cc: Maciej W. Rozycki Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq_vectors.h | 47 +++++++++++++------------------------- arch/x86/kernel/apic/io_apic.c | 4 ++-- 2 files changed, 18 insertions(+), 33 deletions(-) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 585a428..8767d99 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -28,19 +28,22 @@ #define MCE_VECTOR 0x12 /* - * IDT vectors usable for external interrupt sources start - * at 0x20: - * hpa said we can start from 0x1f. - * 0x1f is documented as reserved. However, the ability for the APIC - * to generate vectors starting at 0x10 is documented, as is the - * ability for the CPU to receive any vector number as an interrupt. - * 0x1f is used for IRQ_MOVE_CLEANUP_VECTOR since that vector needs - * an entire privilege level (16 vectors) all by itself at a higher - * priority than any actual device vector. Thus, by placing it in the - * otherwise-unusable 0x10 privilege level, we avoid wasting a full - * 16-vector block. + * IDT vectors usable for external interrupt sources start at 0x20. + * (0x80 is the syscall vector, 0x30-0x3f are for ISA) */ -#define FIRST_EXTERNAL_VECTOR 0x1f +#define FIRST_EXTERNAL_VECTOR 0x20 +/* + * We start allocating at 0x21 to spread out vectors evenly between + * priority levels. (0x80 is the syscall vector) + */ +#define VECTOR_OFFSET_START 1 + +/* + * Reserve the lowest usable vector (and hence lowest priority) 0x20 for + * triggering cleanup after irq migration. 0x21-0x2f will still be used + * for device interrupts. + */ +#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR #define IA32_SYSCALL_VECTOR 0x80 #ifdef CONFIG_X86_32 @@ -48,17 +51,7 @@ #endif /* - * Reserve the lowest usable priority level 0x10 - 0x1f for triggering - * cleanup after irq migration. - * this overlaps with the reserved range for cpu exceptions so this - * will need to be changed to 0x20 - 0x2f if the last cpu exception is - * ever allocated. - */ - -#define IRQ_MOVE_CLEANUP_VECTOR FIRST_EXTERNAL_VECTOR - -/* - * Vectors 0x20-0x2f are used for ISA interrupts. + * Vectors 0x30-0x3f are used for ISA interrupts. * round up to the next 16-vector boundary */ #define IRQ0_VECTOR ((FIRST_EXTERNAL_VECTOR + 16) & ~15) @@ -132,14 +125,6 @@ */ #define MCE_SELF_VECTOR 0xeb -/* - * First APIC vector available to drivers: (vectors 0x30-0xee). We - * start allocating at 0x31 to spread out vectors evenly between - * priority levels. (0x80 is the syscall vector) - */ -#define FIRST_DEVICE_VECTOR (IRQ15_VECTOR + 1) -#define VECTOR_OFFSET_START 1 - #define NR_VECTORS 256 #define FPU_IRQ 13 diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e9ba090..409f494 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1162,7 +1162,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR + VECTOR_OFFSET_START; + static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; static int current_offset = VECTOR_OFFSET_START % 8; unsigned int old_vector; int cpu, err; @@ -1199,7 +1199,7 @@ next: if (vector >= first_system_vector) { /* If out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; + vector = FIRST_EXTERNAL_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; -- cgit v1.1 From 97943390b043bcafca69f9163b86bbf627b75589 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 19 Jan 2010 12:20:54 -0800 Subject: x86, irq: Don't block IRQ0_VECTOR..IRQ15_VECTOR's on all cpu's Currently IRQ0..IRQ15 are assigned to IRQ0_VECTOR..IRQ15_VECTOR's on all the cpu's. If these IRQ's are handled by legacy pic controller, then the kernel handles them only on cpu 0. So there is no need to block this vector space on all cpu's. Similarly if these IRQ's are handled by IO-APIC, then the IRQ affinity will determine on which cpu's we need allocate the vector resource for that particular IRQ. This can be done dynamically and here also there is no need to block 16 vectors for IRQ0..IRQ15 on all cpu's. Fix this by initially assigning IRQ0..IRQ15 to IRQ0_VECTOR..IRQ15_VECTOR's only on cpu 0. If the legacy controllers like pic handles these irq's, then this configuration will be fixed. If more modern controllers like IO-APIC handle these IRQ's, then we start with this configuration and as IRQ's migrate, vectors (/and cpu's) associated with these IRQ's change dynamically. This will freeup the block of 16 vectors on other cpu's which don't handle IRQ0..IRQ15, which can now be used for other IRQ's that the particular cpu handle. [ hpa: this also an architectural cleanup for future legacy-PIC-free configurations. ] [ hpa: fixed typo NR_LEGACY_IRQS -> NR_IRQS_LEGACY ] Signed-off-by: Suresh Siddha LKML-Reference: <1263932453.2814.52.camel@sbs-t61.sc.intel.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/irq.h | 1 + arch/x86/kernel/apic/io_apic.c | 33 ++++++++++----------------------- arch/x86/kernel/irqinit.c | 35 +++++++++++++++++------------------ arch/x86/kernel/vmiclock_32.c | 2 ++ 4 files changed, 30 insertions(+), 41 deletions(-) diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 5458380..2622927 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h @@ -48,5 +48,6 @@ extern DECLARE_BITMAP(used_vectors, NR_VECTORS); extern int vector_used_by_percpu_irq(unsigned int vector); extern void init_ISA_irqs(void); +extern int nr_legacy_irqs; #endif /* _ASM_X86_IRQ_H */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 409f494..1a30587 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -94,8 +94,6 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; /* # of MP IRQ source entries */ int mp_irq_entries; -/* Number of legacy interrupts */ -static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; /* GSI interrupts */ static int nr_irqs_gsi = NR_IRQS_LEGACY; @@ -140,27 +138,10 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ -static struct irq_cfg irq_cfgx[] = { +static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; #else -static struct irq_cfg irq_cfgx[NR_IRQS] = { +static struct irq_cfg irq_cfgx[NR_IRQS]; #endif - [0] = { .vector = IRQ0_VECTOR, }, - [1] = { .vector = IRQ1_VECTOR, }, - [2] = { .vector = IRQ2_VECTOR, }, - [3] = { .vector = IRQ3_VECTOR, }, - [4] = { .vector = IRQ4_VECTOR, }, - [5] = { .vector = IRQ5_VECTOR, }, - [6] = { .vector = IRQ6_VECTOR, }, - [7] = { .vector = IRQ7_VECTOR, }, - [8] = { .vector = IRQ8_VECTOR, }, - [9] = { .vector = IRQ9_VECTOR, }, - [10] = { .vector = IRQ10_VECTOR, }, - [11] = { .vector = IRQ11_VECTOR, }, - [12] = { .vector = IRQ12_VECTOR, }, - [13] = { .vector = IRQ13_VECTOR, }, - [14] = { .vector = IRQ14_VECTOR, }, - [15] = { .vector = IRQ15_VECTOR, }, -}; void __init io_apic_disable_legacy(void) { @@ -185,8 +166,14 @@ int __init arch_early_irq_init(void) desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); - if (i < nr_legacy_irqs) - cpumask_setall(cfg[i].domain); + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. + */ + if (i < nr_legacy_irqs) { + cfg[i].vector = IRQ0_VECTOR + i; + cpumask_set_cpu(0, cfg[i].domain); + } } return 0; diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index d593222..fce55d5 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -84,24 +84,7 @@ static struct irqaction irq2 = { }; DEFINE_PER_CPU(vector_irq_t, vector_irq) = { - [0 ... IRQ0_VECTOR - 1] = -1, - [IRQ0_VECTOR] = 0, - [IRQ1_VECTOR] = 1, - [IRQ2_VECTOR] = 2, - [IRQ3_VECTOR] = 3, - [IRQ4_VECTOR] = 4, - [IRQ5_VECTOR] = 5, - [IRQ6_VECTOR] = 6, - [IRQ7_VECTOR] = 7, - [IRQ8_VECTOR] = 8, - [IRQ9_VECTOR] = 9, - [IRQ10_VECTOR] = 10, - [IRQ11_VECTOR] = 11, - [IRQ12_VECTOR] = 12, - [IRQ13_VECTOR] = 13, - [IRQ14_VECTOR] = 14, - [IRQ15_VECTOR] = 15, - [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 + [0 ... NR_VECTORS - 1] = -1, }; int vector_used_by_percpu_irq(unsigned int vector) @@ -116,6 +99,9 @@ int vector_used_by_percpu_irq(unsigned int vector) return 0; } +/* Number of legacy interrupts */ +int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; + void __init init_ISA_irqs(void) { int i; @@ -142,6 +128,19 @@ void __init init_ISA_irqs(void) void __init init_IRQ(void) { + int i; + + /* + * On cpu 0, Assign IRQ0_VECTOR..IRQ15_VECTOR's to IRQ 0..15. + * If these IRQ's are handled by legacy interrupt-controllers like PIC, + * then this configuration will likely be static after the boot. If + * these IRQ's are handled by more mordern controllers like IO-APIC, + * then this vector space can be freed and re-used dynamically as the + * irq's migrate etc. + */ + for (i = 0; i < nr_legacy_irqs; i++) + per_cpu(vector_irq, 0)[IRQ0_VECTOR + i] = i; + x86_init.irqs.intr_init(); } diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 1268d99..2f1ca56 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c @@ -235,6 +235,8 @@ void __init vmi_time_init(void) vmi_time_init_clockevent(); setup_irq(0, &vmi_clock_action); + for_each_possible_cpu(cpu) + per_cpu(vector_irq, cpu)[vmi_get_timer_vector()] = 0; } #ifdef CONFIG_X86_LOCAL_APIC -- cgit v1.1 From 69c89efb51510b3dc0fa336f7fa257c6e1799ee4 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 29 Jan 2010 11:42:20 -0800 Subject: x86, irq: Update the vector domain for legacy irqs handled by io-apic In the recent change of not reserving IRQ0_VECTOR..IRQ15_VECTOR's on all cpu's, we start with irq 0..15 getting directed to (and handled on) cpu-0. In the logical flat mode, once the AP's are online (and before irqbalance comes into picture), kernel intends to handle these IRQ's on any cpu (as the logical flat mode allows to specify multiple cpu's for the irq destination and the chipset based routing can deliver to the interrupt to any one of the specified cpu's). This was broken with our recent change, which was ending up using only cpu 0 as the destination, even when the kernel was specifying to use all online cpu's for the logical flat mode case. Fix this by updating vector allocation domain (cfg->domain) for legacy irqs, when the IO-APIC handles them. Signed-off-by: Suresh Siddha LKML-Reference: <20100129194330.207790269@sbs-t61.sc.intel.com> Tested-by: Li Zefan Cc: Yinghai Lu Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1a30587..2430b31 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1428,6 +1428,14 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq cfg = desc->chip_data; + /* + * For legacy irqs, cfg->domain starts with cpu 0 for legacy + * controllers like 8259. Now that IO-APIC can handle this irq, update + * the cfg->domain. + */ + if (irq < nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) + apic->vector_allocation_domain(0, cfg->domain); + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; -- cgit v1.1 From 9d133e5db993d577bd868b54083869fe5479fcff Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 29 Jan 2010 11:42:21 -0800 Subject: x86, irq: Move __setup_vector_irq() before the first irq enable in cpu online path Lowest priority delivery of logical flat mode is broken on some systems, such that even when IO-APIC RTE says deliver the interrupt to a particular CPU, interrupt subsystem delivers the interrupt to totally different CPU. For example, this behavior was observed on a P4 based system with SiS chipset which was reported by Li Zefan. We have been handling this kind of behavior by making sure that in logical flat mode, we assign the same vector to irq mappings on all the 8 possible logical cpu's. But we have been doing this initial assignment (__setup_vector_irq()) a little late (before which interrupts were already enabled for a short duration). Move the __setup_vector_irq() before the first irq enable point in the cpu online path to avoid the issue of not handling some interrupts that wrongly hit the cpu which is still coming online. Signed-off-by: Suresh Siddha LKML-Reference: <20100129194330.283696385@sbs-t61.sc.intel.com> Tested-by: Li Zefan Cc: Yinghai Lu Cc: Eric W. Biederman Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 8 +++++++- arch/x86/kernel/smpboot.c | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 2430b31..937150e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1256,11 +1256,16 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; struct irq_desc *desc; + /* + * vector_lock will make sure that we don't run into irq vector + * assignments that might be happening on another cpu in parallel, + * while we setup our initial vector to irq mappings. + */ + spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; @@ -1279,6 +1284,7 @@ void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } + spin_unlock(&vector_lock); } static struct irq_chip ioapic_chip; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8..b2ebcba 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -241,6 +241,11 @@ static void __cpuinit smp_callin(void) map_cpu_to_logical_apicid(); notify_cpu_starting(cpuid); + + /* + * Need to setup vector mappings before we enable interrupts. + */ + __setup_vector_irq(smp_processor_id()); /* * Get our bogomips. * @@ -315,7 +320,6 @@ notrace static void __cpuinit start_secondary(void *unused) */ ipi_call_lock(); lock_vector_lock(); - __setup_vector_irq(smp_processor_id()); set_cpu_online(smp_processor_id(), true); unlock_vector_lock(); ipi_call_unlock(); -- cgit v1.1 From 318f6b228ba88a394ef560efc1bfe028ad5ae6b6 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Wed, 10 Feb 2010 20:55:16 +0100 Subject: x86, ia32_aout: do not kill argument mapping Do not set current->mm->mmap to NULL in 32-bit emulation on 64-bit load_aout_binary after flush_old_exec as it would destroy already set brpm mapping with arguments. Introduced by b6a2fea39318e43fee84fa7b0b90d68bed92d2ba mm: variable length argument support where the argument mapping in bprm was added. [ hpa: this is a regression from 2.6.22... time to kill a.out? ] Signed-off-by: Jiri Slaby LKML-Reference: <1265831716-7668-1-git-send-email-jslaby@suse.cz> Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Ollie Wild Cc: x86@kernel.org Cc: Signed-off-by: H. Peter Anvin --- arch/x86/ia32/ia32_aout.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index f9f4724..14531ab 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -327,7 +327,6 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->free_area_cache = TASK_UNMAPPED_BASE; current->mm->cached_hole_size = 0; - current->mm->mmap = NULL; install_exec_creds(bprm); current->flags &= ~PF_FORKNOEXEC; -- cgit v1.1 From 18dce6ba5c8c6bd0f3ab4efa4cbdd698dab5c40a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:05 -0800 Subject: x86: Fix SCI on IOAPIC != 0 Thomas Renninger reported on IBM x3330 booting a latest kernel on this machine results in: PCI: PCI BIOS revision 2.10 entry at 0xfd61c, last bus=1 PCI: Using configuration type 1 for base access bio: create slab at 0 ACPI: SCI (IRQ30) allocation failed ACPI Exception: AE_NOT_ACQUIRED, Unable to install System Control Interrupt handler (20090903/evevent-161) ACPI: Unable to start the ACPI Interpreter Later all kind of devices fail... and bisect it down to this commit: commit b9c61b70075c87a8612624736faf4a2de5b1ed30 x86/pci: update pirq_enable_irq() to setup io apic routing it turns out we need to set irq routing for the sci on ioapic1 early. -v2: make it work without sparseirq too. -v3: fix checkpatch.pl warning, and cc to stable Reported-by: Thomas Renninger Bisected-by: Thomas Renninger Tested-by: Thomas Renninger Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-2-git-send-email-yinghai@kernel.org> Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/io_apic.h | 1 + arch/x86/kernel/acpi/boot.c | 9 +++++++- arch/x86/kernel/apic/io_apic.c | 50 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 7c7c16c..5f61f6e 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -160,6 +160,7 @@ extern int io_apic_get_redir_entries(int ioapic); struct io_apic_irq_attr; extern int io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr); +void setup_IO_APIC_irq_extra(u32 gsi); extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); extern void ioapic_insert_resources(void); diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0acbcdf..5c96b75 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -446,6 +446,12 @@ void __init acpi_pic_sci_set_trigger(unsigned int irq, u16 trigger) int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) { *irq = gsi; + +#ifdef CONFIG_X86_IO_APIC + if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) + setup_IO_APIC_irq_extra(gsi); +#endif + return 0; } @@ -473,7 +479,8 @@ int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); } #endif - acpi_gsi_to_irq(plat_gsi, &irq); + irq = plat_gsi; + return irq; } diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca..5e4cce2 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1539,6 +1539,56 @@ static void __init setup_IO_APIC_irqs(void) } /* + * for the gsit that is not in first ioapic + * but could not use acpi_register_gsi() + * like some special sci in IBM x3330 + */ +void setup_IO_APIC_irq_extra(u32 gsi) +{ + int apic_id = 0, pin, idx, irq; + int node = cpu_to_node(boot_cpu_id); + struct irq_desc *desc; + struct irq_cfg *cfg; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + apic_id = mp_find_ioapic(gsi); + if (apic_id < 0) + return; + + pin = mp_find_ioapic_pin(apic_id, gsi); + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) + return; + + irq = pin_2_irq(idx, apic_id, pin); +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(irq); + if (desc) + return; +#endif + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + return; + } + + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + + if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[apic_id].apicid, pin); + return; + } + set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); + + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); +} + +/* * Set up the timer pin, possibly with the 8259A-master behind. */ static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin, -- cgit v1.1 From ced5b697a76d325e7a7ac7d382dbbb632c765093 Mon Sep 17 00:00:00 2001 From: Brandon Phiilps Date: Wed, 10 Feb 2010 01:20:06 -0800 Subject: x86: Avoid race condition in pci_enable_msix() Keep chip_data in create_irq_nr and destroy_irq. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-3-git-send-email-yinghai@kernel.org> Signed-off-by: Brandon Phililps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 18 ++++----------- include/linux/irq.h | 2 ++ kernel/irq/chip.c | 52 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 22 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 53243ca..c86591b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3228,12 +3228,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); + return irq; } @@ -3256,17 +3253,12 @@ void destroy_irq(unsigned int irq) { unsigned long flags; struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); spin_lock_irqsave(&vector_lock, flags); + cfg = irq_to_desc(irq)->chip_data; __clear_irq_vector(irq, cfg); spin_unlock_irqrestore(&vector_lock, flags); } diff --git a/include/linux/irq.h b/include/linux/irq.h index 451481c..4d9b26e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq) /* Dynamic irq helper functions */ extern void dynamic_irq_init(unsigned int irq); +void dynamic_irq_init_keep_chip_data(unsigned int irq); extern void dynamic_irq_cleanup(unsigned int irq); +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq); /* Set/get chip/data for an IRQ: */ extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa2..d70394f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -18,11 +18,7 @@ #include "internals.h" -/** - * dynamic_irq_init - initialize a dynamically allocated irq - * @irq: irq number to initialize - */ -void dynamic_irq_init(unsigned int irq) +static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc; unsigned long flags; @@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq) desc->depth = 1; desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->action = NULL; desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq) } /** - * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * dynamic_irq_init - initialize a dynamically allocated irq * @irq: irq number to initialize */ -void dynamic_irq_cleanup(unsigned int irq) +void dynamic_irq_init(unsigned int irq) +{ + dynamic_irq_init_x(irq, false); +} + +/** + * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_init_keep_chip_data(unsigned int irq) +{ + dynamic_irq_init_x(irq, true); +} + +static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq) } desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->handle_irq = handle_bad_irq; desc->chip = &no_irq_chip; desc->name = NULL; @@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +/** + * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * @irq: irq number to initialize + */ +void dynamic_irq_cleanup(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, false); +} + +/** + * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, true); +} + /** * set_irq_chip - set the irq chip for an irq -- cgit v1.1 From dade7716925a4e9a31f249f9ca1ed4e2f1495a8c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Jul 2009 18:39:36 +0200 Subject: x86: Convert ioapic_lock and vector_lock to raw_spinlock Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 106 ++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 53 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 937150e..d55e43d 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -73,8 +73,8 @@ */ int sis_apic_bug = -1; -static DEFINE_SPINLOCK(ioapic_lock); -static DEFINE_SPINLOCK(vector_lock); +static DEFINE_RAW_SPINLOCK(ioapic_lock); +static DEFINE_RAW_SPINLOCK(vector_lock); /* * # of IRQ routing registers @@ -393,7 +393,7 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) struct irq_pin_list *entry; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; int pin; @@ -402,11 +402,11 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return true; } } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return false; } @@ -420,10 +420,10 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) { union entry_union eu; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return eu.entry; } @@ -446,9 +446,9 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -461,10 +461,10 @@ static void ioapic_mask_entry(int apic, int pin) unsigned long flags; union entry_union eu = { .entry.mask = 1 }; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -591,9 +591,9 @@ static void mask_IO_APIC_irq_desc(struct irq_desc *desc) BUG_ON(!cfg); - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __mask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) @@ -601,9 +601,9 @@ static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) struct irq_cfg *cfg = desc->chip_data; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void mask_IO_APIC_irq(unsigned int irq) @@ -1127,12 +1127,12 @@ void lock_vector_lock(void) /* Used to the online set of cpus does not change * during assign_irq_vector. */ - spin_lock(&vector_lock); + raw_spin_lock(&vector_lock); } void unlock_vector_lock(void) { - spin_unlock(&vector_lock); + raw_spin_unlock(&vector_lock); } static int @@ -1220,9 +1220,9 @@ int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) int err; unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); err = __assign_irq_vector(irq, cfg, mask); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); return err; } @@ -1265,7 +1265,7 @@ void __setup_vector_irq(int cpu) * assignments that might be happening on another cpu in parallel, * while we setup our initial vector to irq mappings. */ - spin_lock(&vector_lock); + raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; @@ -1284,7 +1284,7 @@ void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } - spin_unlock(&vector_lock); + raw_spin_unlock(&vector_lock); } static struct irq_chip ioapic_chip; @@ -1603,14 +1603,14 @@ __apicdebuginit(void) print_IO_APIC(void) for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); if (reg_01.bits.version >= 0x20) reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); @@ -1905,9 +1905,9 @@ void __init enable_IO_APIC(void) * The number of IO-APIC IRQ registers (== #pins): */ for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } @@ -2047,9 +2047,9 @@ void __init setup_ioapic_ids_from_mpc(void) for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic_id].apicid; @@ -2108,16 +2108,16 @@ void __init setup_ioapic_ids_from_mpc(void) mp_ioapics[apic_id].apicid); reg_00.bits.ID = mp_ioapics[apic_id].apicid; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic_id, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else @@ -2200,7 +2200,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) unsigned long flags; struct irq_cfg *cfg; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); if (irq < nr_legacy_irqs) { disable_8259A_irq(irq); if (i8259A_irq_pending(irq)) @@ -2208,7 +2208,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) } cfg = irq_cfg(irq); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } @@ -2219,9 +2219,9 @@ static int ioapic_retrigger_irq(unsigned int irq) struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; } @@ -2314,14 +2314,14 @@ set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); ret = set_desc_affinity(desc, mask, &dest); if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } @@ -2549,9 +2549,9 @@ static void eoi_ioapic_irq(struct irq_desc *desc) irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void ack_apic_level(unsigned int irq) @@ -3133,13 +3133,13 @@ static int ioapic_resume(struct sys_device *dev) data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ioapic_write_entry(dev->id, i, entry[i]); @@ -3202,7 +3202,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) if (irq_want < nr_irqs_gsi) irq_want = nr_irqs_gsi; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { @@ -3221,7 +3221,7 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) irq = new; break; } - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); if (irq > 0) { dynamic_irq_init(irq); @@ -3261,9 +3261,9 @@ void destroy_irq(unsigned int irq) desc->chip_data = cfg; free_irte(irq); - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); __clear_irq_vector(irq, cfg); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); } /* @@ -3800,9 +3800,9 @@ int __init io_apic_get_redir_entries (int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } @@ -3964,9 +3964,9 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (physids_empty(apic_id_map)) apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " @@ -4000,10 +4000,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0, reg_00.raw); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ if (reg_00.bits.ID != apic_id) { @@ -4024,9 +4024,9 @@ int __init io_apic_get_version(int ioapic) union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } -- cgit v1.1 From 0fdc7a8022c3eaff6b5ee27ffb9e913e5e58d8e9 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Jul 2009 16:49:55 +0200 Subject: x86: Convert nmi_lock to raw_spinlock nmi_lock must be a spinning spinlock in -rt. Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/nmi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index 0159a69..24e7742 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -416,13 +416,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) /* We can be called before check_nmi_watchdog, hence NULL check. */ if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - static DEFINE_SPINLOCK(lock); /* Serialise the printks */ + static DEFINE_RAW_SPINLOCK(lock); /* Serialise the printks */ - spin_lock(&lock); + raw_spin_lock(&lock); printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); show_regs(regs); dump_stack(); - spin_unlock(&lock); + raw_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); rc = 1; -- cgit v1.1 From 5619c28061ff9d2559a93eaba492935530f2a513 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 25 Jul 2009 18:35:11 +0200 Subject: x86: Convert i8259_lock to raw_spinlock Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/i8259.h | 2 +- arch/x86/kernel/apic/io_apic.c | 4 ++-- arch/x86/kernel/i8259.c | 30 +++++++++++++++--------------- arch/x86/kernel/time.c | 4 ++-- arch/x86/kernel/visws_quirks.c | 6 +++--- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 58d7091..7ec65b1 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h @@ -24,7 +24,7 @@ extern unsigned int cached_irq_mask; #define SLAVE_ICW4_DEFAULT 0x01 #define PIC_ICW4_AEOI 2 -extern spinlock_t i8259A_lock; +extern raw_spinlock_t i8259A_lock; extern void init_8259A(int auto_eoi); extern void enable_8259A_irq(unsigned int irq); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c86591b..f5e4033 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1830,7 +1830,7 @@ __apicdebuginit(void) print_PIC(void) printk(KERN_DEBUG "\nprinting PIC contents\n"); - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); v = inb(0xa1) << 8 | inb(0x21); printk(KERN_DEBUG "... PIC IMR: %04x\n", v); @@ -1844,7 +1844,7 @@ __apicdebuginit(void) print_PIC(void) outb(0x0a,0xa0); outb(0x0a,0x20); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); printk(KERN_DEBUG "... PIC ISR: %04x\n", v); diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c index df89102..8c93a84 100644 --- a/arch/x86/kernel/i8259.c +++ b/arch/x86/kernel/i8259.c @@ -32,7 +32,7 @@ */ static int i8259A_auto_eoi; -DEFINE_SPINLOCK(i8259A_lock); +DEFINE_RAW_SPINLOCK(i8259A_lock); static void mask_and_ack_8259A(unsigned int); struct irq_chip i8259A_chip = { @@ -68,13 +68,13 @@ void disable_8259A_irq(unsigned int irq) unsigned int mask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask |= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } void enable_8259A_irq(unsigned int irq) @@ -82,13 +82,13 @@ void enable_8259A_irq(unsigned int irq) unsigned int mask = ~(1 << irq); unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); cached_irq_mask &= mask; if (irq & 8) outb(cached_slave_mask, PIC_SLAVE_IMR); else outb(cached_master_mask, PIC_MASTER_IMR); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } int i8259A_irq_pending(unsigned int irq) @@ -97,12 +97,12 @@ int i8259A_irq_pending(unsigned int irq) unsigned long flags; int ret; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); if (irq < 8) ret = inb(PIC_MASTER_CMD) & mask; else ret = inb(PIC_SLAVE_CMD) & (mask >> 8); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); return ret; } @@ -150,7 +150,7 @@ static void mask_and_ack_8259A(unsigned int irq) unsigned int irqmask = 1 << irq; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); /* * Lightweight spurious IRQ detection. We do not want * to overdo spurious IRQ handling - it's usually a sign @@ -183,7 +183,7 @@ handle_real_irq: outb(cached_master_mask, PIC_MASTER_IMR); outb(0x60+irq, PIC_MASTER_CMD); /* 'Specific EOI to master */ } - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); return; spurious_8259A_irq: @@ -285,24 +285,24 @@ void mask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } void unmask_8259A(void) { unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } void init_8259A(int auto_eoi) @@ -311,7 +311,7 @@ void init_8259A(int auto_eoi) i8259A_auto_eoi = auto_eoi; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */ @@ -356,5 +356,5 @@ void init_8259A(int auto_eoi) outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */ outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */ - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); } diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index be25734..fb5cc5e1 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -70,11 +70,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) * manually to deassert NMI lines for the watchdog if run * on an 82489DX-based system. */ - spin_lock(&i8259A_lock); + raw_spin_lock(&i8259A_lock); outb(0x0c, PIC_MASTER_OCW3); /* Ack the IRQ; AEOI will end it automatically. */ inb(PIC_MASTER_POLL); - spin_unlock(&i8259A_lock); + raw_spin_unlock(&i8259A_lock); } global_clock_event->event_handler(global_clock_event); diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 34a279a..ab38ce0 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c @@ -559,7 +559,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) struct irq_desc *desc; unsigned long flags; - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); /* Find out what's interrupting in the PIIX4 master 8259 */ outb(0x0c, 0x20); /* OCW3 Poll command */ @@ -596,7 +596,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) outb(0x60 + realirq, 0x20); } - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); desc = irq_to_desc(realirq); @@ -614,7 +614,7 @@ static irqreturn_t piix4_master_intr(int irq, void *dev_id) return IRQ_HANDLED; out_unlock: - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); return IRQ_NONE; } -- cgit v1.1 From 0a832320f1bae6a4169bf683e201378f2437cfc1 Mon Sep 17 00:00:00 2001 From: "Justin P. Mattock" Date: Tue, 16 Feb 2010 15:17:29 -0800 Subject: x86: Add iMac9,1 to pci_reboot_dmi_table On the iMac9,1 /sbin/reboot results in a black mangled screen. Adding this DMI entry gets the machine to reboot cleanly as it should. Signed-off-by: Justin P. Mattock LKML-Reference: <1266362249-3337-1-git-send-email-justinmattock@gmail.com> Signed-off-by: H. Peter Anvin Cc: Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 704bddc..8e1aac8 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -461,6 +461,14 @@ static struct dmi_system_id __initdata pci_reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "Macmini3,1"), }, }, + { /* Handle problems with rebooting on the iMac9,1. */ + .callback = set_pci_reboot, + .ident = "Apple iMac9,1", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "iMac9,1"), + }, + }, { } }; -- cgit v1.1 From febcb0c59ac19fef2081a30e371e7af3619b5e91 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:32 -0800 Subject: irq: Remove unnecessary bootmem code mem_init is moved early already. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-29-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- kernel/irq/handle.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 814940e..0e823c0 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include "internals.h" @@ -87,12 +86,8 @@ void __ref init_kstat_irqs(struct irq_desc *desc, int node, int nr) { void *ptr; - if (slab_is_available()) - ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), - GFP_ATOMIC, node); - else - ptr = alloc_bootmem_node(NODE_DATA(node), - nr * sizeof(*desc->kstat_irqs)); + ptr = kzalloc_node(nr * sizeof(*desc->kstat_irqs), + GFP_ATOMIC, node); /* * don't overwite if can not get new one @@ -219,10 +214,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) if (desc) goto out_unlock; - if (slab_is_available()) - desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); - else - desc = alloc_bootmem_node(NODE_DATA(node), sizeof(*desc)); + desc = kzalloc_node(sizeof(*desc), GFP_ATOMIC, node); printk(KERN_DEBUG " alloc irq_desc for %d on node %d\n", irq, node); if (!desc) { -- cgit v1.1 From 773e3eb7b81e5ba13b5155dfb3bb75b8ce37f8f9 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:33 -0800 Subject: init: Move radix_tree_init() early Prepare for using radix trees in early_irq_init(). Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-30-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- init/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/init/main.c b/init/main.c index 4cb47a1..8451878 100644 --- a/init/main.c +++ b/init/main.c @@ -584,6 +584,7 @@ asmlinkage void __init start_kernel(void) local_irq_disable(); } rcu_init(); + radix_tree_init(); /* init some links before init_ISA_irqs() */ early_irq_init(); init_IRQ(); @@ -657,7 +658,6 @@ asmlinkage void __init start_kernel(void) proc_caches_init(); buffer_init(); key_init(); - radix_tree_init(); security_init(); vfs_caches_init(totalram_pages); signals_init(); -- cgit v1.1 From 99558f0bbe68cb09799ec38adbaa3f3b2dc7ba63 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:34 -0800 Subject: sparseirq: Change irq_desc_ptrs to static Add replace_irq_desc() instead of poking at the array directly. -v2: remove unneeded boundary check in replace_irq_desc Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-31-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- kernel/irq/handle.c | 7 ++++++- kernel/irq/internals.h | 6 +----- kernel/irq/numa_migrate.c | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 0e823c0..266f798 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -127,7 +127,7 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) */ DEFINE_RAW_SPINLOCK(sparse_irq_lock); -struct irq_desc **irq_desc_ptrs __read_mostly; +static struct irq_desc **irq_desc_ptrs __read_mostly; static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { [0 ... NR_IRQS_LEGACY-1] = { @@ -192,6 +192,11 @@ struct irq_desc *irq_to_desc(unsigned int irq) return NULL; } +void replace_irq_desc(unsigned int irq, struct irq_desc *desc) +{ + irq_desc_ptrs[irq] = desc; +} + struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index b2821f0..c63f3bc 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h @@ -21,11 +21,7 @@ extern void clear_kstat_irqs(struct irq_desc *desc); extern raw_spinlock_t sparse_irq_lock; #ifdef CONFIG_SPARSE_IRQ -/* irq_desc_ptrs allocated at boot time */ -extern struct irq_desc **irq_desc_ptrs; -#else -/* irq_desc_ptrs is a fixed size array */ -extern struct irq_desc *irq_desc_ptrs[NR_IRQS]; +void replace_irq_desc(unsigned int irq, struct irq_desc *desc); #endif #ifdef CONFIG_PROC_FS diff --git a/kernel/irq/numa_migrate.c b/kernel/irq/numa_migrate.c index 26bac9d..963559d 100644 --- a/kernel/irq/numa_migrate.c +++ b/kernel/irq/numa_migrate.c @@ -70,7 +70,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, raw_spin_lock_irqsave(&sparse_irq_lock, flags); /* We have to check it to avoid races with another CPU */ - desc = irq_desc_ptrs[irq]; + desc = irq_to_desc(irq); if (desc && old_desc != desc) goto out_unlock; @@ -90,7 +90,7 @@ static struct irq_desc *__real_move_irq_desc(struct irq_desc *old_desc, goto out_unlock; } - irq_desc_ptrs[irq] = desc; + replace_irq_desc(irq, desc); raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); /* free the old one */ -- cgit v1.1 From b5eb78f76ddfa7caf4340cf6893b032f45d8114a Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:35 -0800 Subject: sparseirq: Use radix_tree instead of ptrs array Use radix_tree irq_desc_tree instead of irq_desc_ptrs. -v2: according to Eric and cyrill to use radix_tree_lookup_slot and radix_tree_replace_slot Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-32-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- kernel/irq/handle.c | 49 +++++++++++++++++++++++++------------------------ 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index 266f798..76d5a67 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "internals.h" @@ -127,7 +128,26 @@ static void init_one_irq_desc(int irq, struct irq_desc *desc, int node) */ DEFINE_RAW_SPINLOCK(sparse_irq_lock); -static struct irq_desc **irq_desc_ptrs __read_mostly; +static RADIX_TREE(irq_desc_tree, GFP_ATOMIC); + +static void set_irq_desc(unsigned int irq, struct irq_desc *desc) +{ + radix_tree_insert(&irq_desc_tree, irq, desc); +} + +struct irq_desc *irq_to_desc(unsigned int irq) +{ + return radix_tree_lookup(&irq_desc_tree, irq); +} + +void replace_irq_desc(unsigned int irq, struct irq_desc *desc) +{ + void **ptr; + + ptr = radix_tree_lookup_slot(&irq_desc_tree, irq); + if (ptr) + radix_tree_replace_slot(ptr, desc); +} static struct irq_desc irq_desc_legacy[NR_IRQS_LEGACY] __cacheline_aligned_in_smp = { [0 ... NR_IRQS_LEGACY-1] = { @@ -159,9 +179,6 @@ int __init early_irq_init(void) legacy_count = ARRAY_SIZE(irq_desc_legacy); node = first_online_node; - /* allocate irq_desc_ptrs array based on nr_irqs */ - irq_desc_ptrs = kcalloc(nr_irqs, sizeof(void *), GFP_NOWAIT); - /* allocate based on nr_cpu_ids */ kstat_irqs_legacy = kzalloc_node(NR_IRQS_LEGACY * nr_cpu_ids * sizeof(int), GFP_NOWAIT, node); @@ -175,28 +192,12 @@ int __init early_irq_init(void) lockdep_set_class(&desc[i].lock, &irq_desc_lock_class); alloc_desc_masks(&desc[i], node, true); init_desc_masks(&desc[i]); - irq_desc_ptrs[i] = desc + i; + set_irq_desc(i, &desc[i]); } - for (i = legacy_count; i < nr_irqs; i++) - irq_desc_ptrs[i] = NULL; - return arch_early_irq_init(); } -struct irq_desc *irq_to_desc(unsigned int irq) -{ - if (irq_desc_ptrs && irq < nr_irqs) - return irq_desc_ptrs[irq]; - - return NULL; -} - -void replace_irq_desc(unsigned int irq, struct irq_desc *desc) -{ - irq_desc_ptrs[irq] = desc; -} - struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) { struct irq_desc *desc; @@ -208,14 +209,14 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) return NULL; } - desc = irq_desc_ptrs[irq]; + desc = irq_to_desc(irq); if (desc) return desc; raw_spin_lock_irqsave(&sparse_irq_lock, flags); /* We have to check it to avoid races with another CPU */ - desc = irq_desc_ptrs[irq]; + desc = irq_to_desc(irq); if (desc) goto out_unlock; @@ -228,7 +229,7 @@ struct irq_desc * __ref irq_to_desc_alloc_node(unsigned int irq, int node) } init_one_irq_desc(irq, desc, node); - irq_desc_ptrs[irq] = desc; + set_irq_desc(irq, desc); out_unlock: raw_spin_unlock_irqrestore(&sparse_irq_lock, flags); -- cgit v1.1 From 6738762d73a237ec322b04d8b9d55c8fd5d84713 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:36 -0800 Subject: x86, irq: Remove arch_probe_nr_irqs So keep nr_irqs == NR_IRQS. With radix trees is matters less. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-33-git-send-email-yinghai@kernel.org> Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index f5e4033..c64ddd9 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3826,28 +3826,6 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } -#ifdef CONFIG_SPARSE_IRQ -int __init arch_probe_nr_irqs(void) -{ - int nr; - - if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) - nr_irqs = NR_VECTORS * nr_cpu_ids; - - nr = nr_irqs_gsi + 8 * nr_cpu_ids; -#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) - /* - * for MSI and HT dyn irq - */ - nr += nr_irqs_gsi * 16; -#endif - if (nr < nr_irqs) - nr_irqs = nr; - - return 0; -} -#endif - static int __io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) { -- cgit v1.1 From 2b633e3fac5efada088b57d31e65401f22bcc18f Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Wed, 10 Feb 2010 01:20:37 -0800 Subject: smp: Use nr_cpus= to set nr_cpu_ids early On x86, before prefill_possible_map(), nr_cpu_ids will be NR_CPUS aka CONFIG_NR_CPUS. Add nr_cpus= to set nr_cpu_ids. so we can simulate cpus <=8 are installed on normal config. -v2: accordging to Christoph, acpi_numa_init should use nr_cpu_ids in stead of NR_CPUS. -v3: add doc in kernel-parameters.txt according to Andrew. Signed-off-by: Yinghai Lu LKML-Reference: <1265793639-15071-34-git-send-email-yinghai@kernel.org> Acked-by: Linus Torvalds Signed-off-by: H. Peter Anvin Cc: Tony Luck --- Documentation/kernel-parameters.txt | 6 ++++++ arch/ia64/kernel/acpi.c | 4 ++-- arch/x86/kernel/smpboot.c | 7 ++++--- drivers/acpi/numa.c | 4 ++-- init/main.c | 14 ++++++++++++++ 5 files changed, 28 insertions(+), 7 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 736d456..51bceb0 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1772,6 +1772,12 @@ and is between 256 and 4096 characters. It is defined in the file purges which is reported from either PAL_VM_SUMMARY or SAL PALO. + nr_cpus= [SMP] Maximum number of processors that an SMP kernel + could support. nr_cpus=n : n >= 1 limits the kernel to + supporting 'n' processors. Later in runtime you can not + use hotplug cpu feature to put more cpu back to online. + just like you compile the kernel NR_CPUS=n + nr_uarts= [SERIAL] maximum number of UARTs to be registered. numa_zonelist_order= [KNL, BOOT] Select zonelist order for NUMA. diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 40574ae..605a08b 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -881,8 +881,8 @@ __init void prefill_possible_map(void) possible = available_cpus + additional_cpus; - if (possible > NR_CPUS) - possible = NR_CPUS; + if (possible > nr_cpu_ids) + possible = nr_cpu_ids; printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", possible, max((possible - available_cpus), 0)); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8..eff2fe1 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1213,11 +1213,12 @@ __init void prefill_possible_map(void) total_cpus = max_t(int, possible, num_processors + disabled_cpus); - if (possible > CONFIG_NR_CPUS) { + /* nr_cpu_ids could be reduced via nr_cpus= */ + if (possible > nr_cpu_ids) { printk(KERN_WARNING "%d Processors exceeds NR_CPUS limit of %d\n", - possible, CONFIG_NR_CPUS); - possible = CONFIG_NR_CPUS; + possible, nr_cpu_ids); + possible = nr_cpu_ids; } printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", diff --git a/drivers/acpi/numa.c b/drivers/acpi/numa.c index 7ad48df..b872546 100644 --- a/drivers/acpi/numa.c +++ b/drivers/acpi/numa.c @@ -279,9 +279,9 @@ int __init acpi_numa_init(void) /* SRAT: Static Resource Affinity Table */ if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) { acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY, - acpi_parse_x2apic_affinity, NR_CPUS); + acpi_parse_x2apic_affinity, nr_cpu_ids); acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY, - acpi_parse_processor_affinity, NR_CPUS); + acpi_parse_processor_affinity, nr_cpu_ids); ret = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY, acpi_parse_memory_affinity, NR_NODE_MEMBLKS); diff --git a/init/main.c b/init/main.c index 8451878..05b5283 100644 --- a/init/main.c +++ b/init/main.c @@ -149,6 +149,20 @@ static int __init nosmp(char *str) early_param("nosmp", nosmp); +/* this is hard limit */ +static int __init nrcpus(char *str) +{ + int nr_cpus; + + get_option(&str, &nr_cpus); + if (nr_cpus > 0 && nr_cpus < nr_cpu_ids) + nr_cpu_ids = nr_cpus; + + return 0; +} + +early_param("nr_cpus", nrcpus); + static int __init maxcpus(char *str) { get_option(&str, &setup_max_cpus); -- cgit v1.1 From ca4dbc668412d5fe039be3e26e8e717a616d1ca5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Wed, 17 Feb 2010 18:49:54 -0800 Subject: xen: Remove unnecessary arch specific xen irq functions. Right now xen's use of the x86 and ia64 handle_irq is just bizarre and very fragile as it is very non-obvious the function exists and is is used by code out in drivers/.... Luckily using handle_irq is completely unnecessary, and we can just use the generic irq apis instead. This still leaves drivers/xen/events.c as a problematic user of the generic irq apis it has "static struct irq_info irq_info[NR_IRQS]" but that can be fixed some other time. Signed-off-by: Eric W. Biederman LKML-Reference: <4B7CAAD2.10803@kernel.org> Acked-by: Jeremy Fitzhardinge Cc: Ian Campbell Signed-off-by: H. Peter Anvin --- arch/ia64/include/asm/xen/events.h | 4 ---- drivers/xen/events.c | 8 ++++++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/ia64/include/asm/xen/events.h b/arch/ia64/include/asm/xen/events.h index b8370c8..baa74c8 100644 --- a/arch/ia64/include/asm/xen/events.h +++ b/arch/ia64/include/asm/xen/events.h @@ -36,10 +36,6 @@ static inline int xen_irqs_disabled(struct pt_regs *regs) return !(ia64_psr(regs)->i); } -static inline void handle_irq(int irq, struct pt_regs *regs) -{ - __do_IRQ(irq); -} #define irq_ctx_init(cpu) do { } while (0) #endif /* _ASM_IA64_XEN_EVENTS_H */ diff --git a/drivers/xen/events.c b/drivers/xen/events.c index ce602dd..2f84137 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -649,9 +649,13 @@ void xen_evtchn_do_upcall(struct pt_regs *regs) int bit_idx = __ffs(pending_bits); int port = (word_idx * BITS_PER_LONG) + bit_idx; int irq = evtchn_to_irq[port]; + struct irq_desc *desc; - if (irq != -1) - handle_irq(irq, regs); + if (irq != -1) { + desc = irq_to_desc(irq); + if (desc) + generic_handle_irq_desc(irq, desc); + } } } -- cgit v1.1 From eb5b3794062824ba12d883901eea49ea89d0a678 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 7 Feb 2010 13:02:50 -0800 Subject: x86, irq: Keep chip_data in create_irq_nr and destroy_irq Version 4: use get_irq_chip_data() in destroy_irq() to get rid of some local vars. When two drivers are setting up MSI-X at the same time via pci_enable_msix() there is a race. See this dmesg excerpt: [ 85.170610] ixgbe 0000:02:00.1: irq 97 for MSI/MSI-X [ 85.170611] alloc irq_desc for 99 on node -1 [ 85.170613] igb 0000:08:00.1: irq 98 for MSI/MSI-X [ 85.170614] alloc kstat_irqs on node -1 [ 85.170616] alloc irq_2_iommu on node -1 [ 85.170617] alloc irq_desc for 100 on node -1 [ 85.170619] alloc kstat_irqs on node -1 [ 85.170621] alloc irq_2_iommu on node -1 [ 85.170625] ixgbe 0000:02:00.1: irq 99 for MSI/MSI-X [ 85.170626] alloc irq_desc for 101 on node -1 [ 85.170628] igb 0000:08:00.1: irq 100 for MSI/MSI-X [ 85.170630] alloc kstat_irqs on node -1 [ 85.170631] alloc irq_2_iommu on node -1 [ 85.170635] alloc irq_desc for 102 on node -1 [ 85.170636] alloc kstat_irqs on node -1 [ 85.170639] alloc irq_2_iommu on node -1 [ 85.170646] BUG: unable to handle kernel NULL pointer dereference at 0000000000000088 As you can see igb and ixgbe are both alternating on create_irq_nr() via pci_enable_msix() in their probe function. ixgbe: While looping through irq_desc_ptrs[] via create_irq_nr() ixgbe choses irq_desc_ptrs[102] and exits the loop, drops vector_lock and calls dynamic_irq_init. Then it sets irq_desc_ptrs[102]->chip_data = NULL via dynamic_irq_init(). igb: Grabs the vector_lock now and starts looping over irq_desc_ptrs[] via create_irq_nr(). It gets to irq_desc_ptrs[102] and does this: cfg_new = irq_desc_ptrs[102]->chip_data; if (cfg_new->vector != 0) continue; This hits the NULL deref. Another possible race exists via pci_disable_msix() in a driver or in the number of error paths that call free_msi_irqs(): destroy_irq() dynamic_irq_cleanup() which sets desc->chip_data = NULL ...race window... desc->chip_data = cfg; Remove the save and restore code for cfg in create_irq_nr() and destroy_irq() and take the desc->lock when checking the irq_cfg. Reported-and-analyzed-by: Brandon Philips Signed-off-by: Yinghai Lu LKML-Reference: <20100207210250.GB8256@jenkins.home.ifup.org> Signed-off-by: Brandon Phiilps Cc: stable@kernel.org Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic/io_apic.c | 20 ++++------------ include/linux/irq.h | 2 ++ kernel/irq/chip.c | 52 ++++++++++++++++++++++++++++++++++-------- 3 files changed, 50 insertions(+), 24 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 5e4cce2..e93a76b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3278,12 +3278,9 @@ unsigned int create_irq_nr(unsigned int irq_want, int node) } spin_unlock_irqrestore(&vector_lock, flags); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); + return irq; } @@ -3305,19 +3302,12 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; - struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); + __clear_irq_vector(irq, get_irq_chip_data(irq)); spin_unlock_irqrestore(&vector_lock, flags); } diff --git a/include/linux/irq.h b/include/linux/irq.h index 451481c..4d9b26e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -400,7 +400,9 @@ static inline int irq_has_action(unsigned int irq) /* Dynamic irq helper functions */ extern void dynamic_irq_init(unsigned int irq); +void dynamic_irq_init_keep_chip_data(unsigned int irq); extern void dynamic_irq_cleanup(unsigned int irq); +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq); /* Set/get chip/data for an IRQ: */ extern int set_irq_chip(unsigned int irq, struct irq_chip *chip); diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index ecc3fa2..d70394f 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -18,11 +18,7 @@ #include "internals.h" -/** - * dynamic_irq_init - initialize a dynamically allocated irq - * @irq: irq number to initialize - */ -void dynamic_irq_init(unsigned int irq) +static void dynamic_irq_init_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc; unsigned long flags; @@ -41,7 +37,8 @@ void dynamic_irq_init(unsigned int irq) desc->depth = 1; desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->action = NULL; desc->irq_count = 0; desc->irqs_unhandled = 0; @@ -55,10 +52,26 @@ void dynamic_irq_init(unsigned int irq) } /** - * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * dynamic_irq_init - initialize a dynamically allocated irq * @irq: irq number to initialize */ -void dynamic_irq_cleanup(unsigned int irq) +void dynamic_irq_init(unsigned int irq) +{ + dynamic_irq_init_x(irq, false); +} + +/** + * dynamic_irq_init_keep_chip_data - initialize a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_init_keep_chip_data(unsigned int irq) +{ + dynamic_irq_init_x(irq, true); +} + +static void dynamic_irq_cleanup_x(unsigned int irq, bool keep_chip_data) { struct irq_desc *desc = irq_to_desc(irq); unsigned long flags; @@ -77,7 +90,8 @@ void dynamic_irq_cleanup(unsigned int irq) } desc->msi_desc = NULL; desc->handler_data = NULL; - desc->chip_data = NULL; + if (!keep_chip_data) + desc->chip_data = NULL; desc->handle_irq = handle_bad_irq; desc->chip = &no_irq_chip; desc->name = NULL; @@ -85,6 +99,26 @@ void dynamic_irq_cleanup(unsigned int irq) raw_spin_unlock_irqrestore(&desc->lock, flags); } +/** + * dynamic_irq_cleanup - cleanup a dynamically allocated irq + * @irq: irq number to initialize + */ +void dynamic_irq_cleanup(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, false); +} + +/** + * dynamic_irq_cleanup_keep_chip_data - cleanup a dynamically allocated irq + * @irq: irq number to initialize + * + * does not set irq_to_desc(irq)->chip_data to NULL + */ +void dynamic_irq_cleanup_keep_chip_data(unsigned int irq) +{ + dynamic_irq_cleanup_x(irq, true); +} + /** * set_irq_chip - set the irq chip for an irq -- cgit v1.1 From 21c2fd9970cc8e457058f84016450a2e9876125e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 26 Feb 2010 11:17:16 +0100 Subject: x86: apic: Fix mismerge, add arch_probe_nr_irqs() again Merge commit aef55d4922 mis-merged io_apic.c so we lost the arch_probe_nr_irqs() method. This caused subtle boot breakages (udev confusion likely due to missing drivers) with certain configs. Cc: H. Peter Anvin Cc: Yinghai Lu LKML-Reference: <20100207210250.GB8256@jenkins.home.ifup.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 9795898..72ac2a3 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3876,6 +3876,28 @@ void __init probe_nr_irqs_gsi(void) printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } +#ifdef CONFIG_SPARSE_IRQ +int __init arch_probe_nr_irqs(void) +{ + int nr; + + if (nr_irqs > (NR_VECTORS * nr_cpu_ids)) + nr_irqs = NR_VECTORS * nr_cpu_ids; + + nr = nr_irqs_gsi + 8 * nr_cpu_ids; +#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ) + /* + * for MSI and HT dyn irq + */ + nr += nr_irqs_gsi * 16; +#endif + if (nr < nr_irqs) + nr_irqs = nr; + + return 0; +} +#endif + static int __io_apic_set_pci_routing(struct device *dev, int irq, struct io_apic_irq_attr *irq_attr) { -- cgit v1.1 From fad539956c9e69749a03f7817d22d1bab87657bf Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 28 Feb 2010 01:06:34 -0800 Subject: x86: Fix out of order of gsi Iranna D Ankad reported that IBM x3950 systems have boot problems after this commit: | | commit b9c61b70075c87a8612624736faf4a2de5b1ed30 | | x86/pci: update pirq_enable_irq() to setup io apic routing | The problem is that with the patch, the machine freezes when console=ttyS0,... kernel serial parameter is passed. It seem to freeze at DVD initialization and the whole problem seem to be DVD/pata related, but somehow exposed through the serial parameter. Such apic problems can expose really weird behavior: ACPI: IOAPIC (id[0x10] address[0xfecff000] gsi_base[0]) IOAPIC[0]: apic_id 16, version 0, address 0xfecff000, GSI 0-2 ACPI: IOAPIC (id[0x0f] address[0xfec00000] gsi_base[3]) IOAPIC[1]: apic_id 15, version 0, address 0xfec00000, GSI 3-38 ACPI: IOAPIC (id[0x0e] address[0xfec01000] gsi_base[39]) IOAPIC[2]: apic_id 14, version 0, address 0xfec01000, GSI 39-74 ACPI: INT_SRC_OVR (bus 0 bus_irq 1 global_irq 4 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 5 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 3 global_irq 6 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 4 global_irq 7 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 6 global_irq 9 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 7 global_irq 10 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 8 global_irq 11 low edge) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 12 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 12 global_irq 15 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 13 global_irq 16 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 14 global_irq 17 low edge) ACPI: INT_SRC_OVR (bus 0 bus_irq 15 global_irq 18 dfl dfl) It turns out that the system has three io apic controllers, but boot ioapic routing is in the second one, and that gsi_base is not 0 - it is using a bunch of INT_SRC_OVR... So these recent changes: 1. one set routing for first io apic controller 2. assume irq = gsi ... will break that system. So try to remap those gsis, need to seperate boot_ioapic_idx detection out of enable_IO_APIC() and call them early. So introduce boot_ioapic_idx, and remap_ioapic_gsi()... -v2: shift gsi with delta instead of gsi_base of boot_ioapic_idx -v3: double check with find_isa_irq_apic(0, mp_INT) to get right boot_ioapic_idx -v4: nr_legacy_irqs -v5: add print out for boot_ioapic_idx, and also make it could be applied for current kernel and previous kernel -v6: add bus_irq, in acpi_sci_ioapic_setup, so can get overwride for sci right mapping... -v7: looks like pnpacpi get irq instead of gsi, so need to revert them back... -v8: split into two patches -v9: according to Eric, use fixed 16 for shifting instead of remap -v10: still need to touch rsparser.c -v11: just revert back to way Eric suggest... anyway the ioapic in first ioapic is blocked by second... -v12: two patches, this one will add more loop but check apic_id and irq > 16 Reported-by: Iranna D Ankad Bisected-by: Iranna D Ankad Tested-by: Gary Hade Signed-off-by: Yinghai Lu Cc: Eric W. Biederman Cc: Thomas Renninger Cc: Eric W. Biederman Cc: Suresh Siddha Cc: len.brown@intel.com LKML-Reference: <4B8A321A.1000008@kernel.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/io_apic.c | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 72ac2a3..97e1e3e 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1475,7 +1475,7 @@ static struct { static void __init setup_IO_APIC_irqs(void) { - int apic_id = 0, pin, idx, irq; + int apic_id, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; @@ -1483,14 +1483,7 @@ static void __init setup_IO_APIC_irqs(void) apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); -#ifdef CONFIG_ACPI - if (!acpi_disabled && acpi_ioapic) { - apic_id = mp_find_ioapic(0); - if (apic_id < 0) - apic_id = 0; - } -#endif - + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { idx = find_irq_entry(apic_id, pin, mp_INT); if (idx == -1) { @@ -1512,6 +1505,9 @@ static void __init setup_IO_APIC_irqs(void) irq = pin_2_irq(idx, apic_id, pin); + if ((apic_id > 0) && (irq > 16)) + continue; + /* * Skip the timer IRQ if there's a quirk handler * installed and if it returns 1: @@ -4105,27 +4101,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { - int pin, ioapic = 0, irq, irq_entry; + int pin, ioapic, irq, irq_entry; struct irq_desc *desc; const struct cpumask *mask; if (skip_ioapic_setup == 1) return; -#ifdef CONFIG_ACPI - if (!acpi_disabled && acpi_ioapic) { - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - ioapic = 0; - } -#endif - + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); + if ((ioapic > 0) && (irq > 16)) + continue; + desc = irq_to_desc(irq); /* -- cgit v1.1