From b9111b7b7f46b0ec1ccb451d60ec439b92e4df65 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 23 Sep 2005 11:10:42 -0700 Subject: [CPUFREQ] Remove preempt_disable from powernow-k8 Via reading the code, my understanding is that powernow-k8 uses preempt_disable to ensure that driver->target doesn't migrate across cpus whilst it's accessing per processor registers, however set_cpus_allowed will provide this for us. Additionally, remove schedule() calls from set_cpus_allowed as set_cpus_allowed ensures that you're executing on the target processor on return. Signed-off-by: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index ab6e061..e2e03ee 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -453,7 +453,6 @@ static int check_supported_cpu(unsigned int cpu) oldmask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(cpu)); - schedule(); if (smp_processor_id() != cpu) { printk(KERN_ERR "limiting to cpu %u failed\n", cpu); @@ -488,9 +487,7 @@ static int check_supported_cpu(unsigned int cpu) out: set_cpus_allowed(current, oldmask); - schedule(); return rc; - } static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, u8 maxvid) @@ -904,7 +901,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - schedule(); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); @@ -959,8 +955,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi err_out: set_cpus_allowed(current, oldmask); - schedule(); - return ret; } @@ -1017,7 +1011,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; set_cpus_allowed(current, cpumask_of_cpu(pol->cpu)); - schedule(); if (smp_processor_id() != pol->cpu) { printk(KERN_ERR "limiting to cpu %u failed\n", pol->cpu); @@ -1036,7 +1029,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) /* run on any CPU again */ set_cpus_allowed(current, oldmask); - schedule(); pol->governor = CPUFREQ_DEFAULT_GOVERNOR; pol->cpus = cpu_core_map[pol->cpu]; @@ -1071,7 +1063,6 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) err_out: set_cpus_allowed(current, oldmask); - schedule(); powernow_k8_cpu_exit_acpi(data); kfree(data); @@ -1107,17 +1098,14 @@ static unsigned int powernowk8_get (unsigned int cpu) set_cpus_allowed(current, oldmask); return 0; } - preempt_disable(); - + if (query_current_values_with_pending_wait(data)) goto out; khz = find_khz_freq_from_fid(data->currfid); - out: - preempt_enable_no_resched(); +out: set_cpus_allowed(current, oldmask); - return khz; } -- cgit v1.1 From ce3a161e693388aaa66d43d26156053311a39b7d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 26 Sep 2005 05:49:44 +0100 Subject: [PATCH] useless includes of linux/irq.h in arch/i386 Most of these guys are simply not needed (pulled by other stuff via asm-i386/hardirq.h). One that is not entirely useless is hilarious - arch/i386/oprofile/nmi_timer_int.c includes linux/irq.h... as a way to get linux/errno.h Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 1 - arch/i386/kernel/apic.c | 1 - arch/i386/kernel/cpu/mcheck/k7.c | 1 - arch/i386/kernel/cpu/mcheck/non-fatal.c | 1 - arch/i386/kernel/cpu/mcheck/p4.c | 1 - arch/i386/kernel/cpu/mcheck/p5.c | 1 - arch/i386/kernel/cpu/mcheck/p6.c | 1 - arch/i386/kernel/cpu/mcheck/winchip.c | 1 - arch/i386/kernel/crash.c | 2 -- arch/i386/kernel/i8259.c | 3 --- arch/i386/kernel/io_apic.c | 1 - arch/i386/kernel/mpparse.c | 1 - arch/i386/kernel/nmi.c | 1 - arch/i386/kernel/process.c | 2 -- arch/i386/kernel/smp.c | 1 - arch/i386/kernel/smpboot.c | 1 - arch/i386/kernel/timers/timer_pit.c | 1 - arch/i386/kernel/traps.c | 1 - arch/i386/mach-default/setup.c | 1 - arch/i386/mach-visws/setup.c | 1 - arch/i386/mach-visws/visws_apic.c | 1 - arch/i386/mach-voyager/setup.c | 1 - arch/i386/mach-voyager/voyager_basic.c | 1 - arch/i386/mach-voyager/voyager_smp.c | 2 -- arch/i386/mach-voyager/voyager_thread.c | 2 -- arch/i386/oprofile/nmi_timer_int.c | 2 +- arch/i386/pci/acpi.c | 1 - arch/i386/pci/irq.c | 1 - arch/i386/power/cpu.c | 17 ----------------- 29 files changed, 1 insertion(+), 51 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index a63351c..838437b2 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index a22a866..5546dde 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -18,7 +18,6 @@ #include #include -#include #include #include #include diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index c4abe76..7c6b9c7 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include diff --git a/arch/i386/kernel/cpu/mcheck/non-fatal.c b/arch/i386/kernel/cpu/mcheck/non-fatal.c index 7864ddf..82dffe0 100644 --- a/arch/i386/kernel/cpu/mcheck/non-fatal.c +++ b/arch/i386/kernel/cpu/mcheck/non-fatal.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 0abccb6..1d1e885 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c index ec0614c..3a2e24b 100644 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ b/arch/i386/kernel/cpu/mcheck/p5.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index f01b73f..3c035b8 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c index 7bae68f..5b9d2dd 100644 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ b/arch/i386/kernel/cpu/mcheck/winchip.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 913be77..0248e08 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -11,10 +11,8 @@ #include #include #include -#include #include #include -#include #include #include #include diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 178f4e9..323ef8a 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include @@ -25,8 +24,6 @@ #include #include -#include - #include /* diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 378313b..fb3991e 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -21,7 +21,6 @@ */ #include -#include #include #include #include diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 15949fd..27aabfc 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -14,7 +14,6 @@ */ #include -#include #include #include #include diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 0178457..72515b8 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index b45cbf9..7a14fdf 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -47,13 +47,11 @@ #include #include #include -#include #include #ifdef CONFIG_MATH_EMULATION #include #endif -#include #include #include diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 48b55db..218d725 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 5f0a95d..1fb26d0 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -42,7 +42,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c index eddb640..e42e46d3 100644 --- a/arch/i386/kernel/timers/timer_pit.c +++ b/arch/i386/kernel/timers/timer_pit.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 431a551..19e90bd 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -52,7 +52,6 @@ #include #include -#include #include #include "mach_traps.h" diff --git a/arch/i386/mach-default/setup.c b/arch/i386/mach-default/setup.c index e5a1a83..b4a7455 100644 --- a/arch/i386/mach-default/setup.c +++ b/arch/i386/mach-default/setup.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/i386/mach-visws/setup.c b/arch/i386/mach-visws/setup.c index 26ada6f..07fac7e 100644 --- a/arch/i386/mach-visws/setup.c +++ b/arch/i386/mach-visws/setup.c @@ -5,7 +5,6 @@ #include #include -#include #include #include diff --git a/arch/i386/mach-visws/visws_apic.c b/arch/i386/mach-visws/visws_apic.c index 04e6585..3e64fb7 100644 --- a/arch/i386/mach-visws/visws_apic.c +++ b/arch/i386/mach-visws/visws_apic.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/i386/mach-voyager/setup.c b/arch/i386/mach-voyager/setup.c index df123fc..7d8a3ac 100644 --- a/arch/i386/mach-voyager/setup.c +++ b/arch/i386/mach-voyager/setup.c @@ -4,7 +4,6 @@ #include #include -#include #include #include #include diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c index cc69875..aa49a33 100644 --- a/arch/i386/mach-voyager/voyager_basic.c +++ b/arch/i386/mach-voyager/voyager_basic.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index 46b0cf4..72a1b9c 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c @@ -30,8 +30,6 @@ #include #include -#include - /* TLB state -- visible externally, indexed physically */ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) ____cacheline_aligned = { &init_mm, 0 }; diff --git a/arch/i386/mach-voyager/voyager_thread.c b/arch/i386/mach-voyager/voyager_thread.c index a9341b0..2b03884 100644 --- a/arch/i386/mach-voyager/voyager_thread.c +++ b/arch/i386/mach-voyager/voyager_thread.c @@ -31,8 +31,6 @@ #include #include -#include - #define THREAD_NAME "kvoyagerd" /* external variables */ diff --git a/arch/i386/oprofile/nmi_timer_int.c b/arch/i386/oprofile/nmi_timer_int.c index ad93cdd..930a112 100644 --- a/arch/i386/oprofile/nmi_timer_int.c +++ b/arch/i386/oprofile/nmi_timer_int.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c index 2941674..7e7a202 100644 --- a/arch/i386/pci/acpi.c +++ b/arch/i386/pci/acpi.c @@ -1,7 +1,6 @@ #include #include #include -#include #include #include #include "pci.h" diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 326a2ed..281ed8a 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 7b0b9ad..b27c5ac 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -8,25 +8,8 @@ */ #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include - -#include -#include -#include -#include static struct saved_context saved_context; -- cgit v1.1 From 7d318d774789657c37a5e994a4a2cf59d4879ae7 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 29 Sep 2005 22:05:55 +0200 Subject: [PATCH] Fix up TLB flush filter disabling I checked with AMD and they requested to only disable it for family 15. Also disable it for i386 too. And some style fixes. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 73aeaf5..4c1ddf2b 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -28,6 +28,22 @@ static void __init init_amd(struct cpuinfo_x86 *c) int mbytes = num_physpages >> (20-PAGE_SHIFT); int r; +#ifdef CONFIG_SMP + unsigned long value; + + /* Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K7_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K7_HWCR, value); + } +#endif + /* * FIXME: We should handle the K5 here. Set up the write * range and also turn on MSR 83 bits 4 and 31 (write alloc, -- cgit v1.1 From b33fa1f3c3ec05e54e73f06c4578948c55d89ef6 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 1 Oct 2005 02:34:42 +1000 Subject: [PATCH] i386: include linux/irq.h rather than asm/hw_irq.h I need the following patch to compile -git8 here, otherwise these files fail to compile (asm/hw_irq.h needs definitions from linux/irq.h and that file provides the required include ordering). I did not do a full audit, though there looks to be many other places that should get the same treatment, if this is the right way to do it. Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 2 +- arch/i386/pci/acpi.c | 2 +- arch/i386/pci/irq.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 838437b2..b66c13c 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -29,12 +29,12 @@ #include #include #include +#include #include #include #include #include -#include #include #ifdef CONFIG_X86_64 diff --git a/arch/i386/pci/acpi.c b/arch/i386/pci/acpi.c index 7e7a202..4c4522b 100644 --- a/arch/i386/pci/acpi.c +++ b/arch/i386/pci/acpi.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include "pci.h" diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index 281ed8a..cddafe3 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include "pci.h" -- cgit v1.1 From dd0fc66fb33cd610bc1a5db8a5e232d34879b4d7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 7 Oct 2005 07:46:04 +0100 Subject: [PATCH] gfp flags annotations - part 1 - added typedef unsigned int __nocast gfp_t; - replaced __nocast uses for gfp flags with gfp_t - it gives exactly the same warnings as far as sparse is concerned, doesn't change generated code (from gcc point of view we replaced unsigned int with typedef) and documents what's going on far better. Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- arch/i386/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/pci-dma.c b/arch/i386/kernel/pci-dma.c index 1e51427..25fe668 100644 --- a/arch/i386/kernel/pci-dma.c +++ b/arch/i386/kernel/pci-dma.c @@ -23,7 +23,7 @@ struct dma_coherent_mem { }; void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, unsigned int __nocast gfp) + dma_addr_t *dma_handle, gfp_t gfp) { void *ret; struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; -- cgit v1.1 From d347f372273c2b3d86a66e2e1c94c790c208e166 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Sun, 9 Oct 2005 18:54:23 +0200 Subject: [PATCH] i386: fix stack alignment for signal handlers This fixes the setup of the alignment of the signal frame, so that all signal handlers are run with a properly aligned stack frame. The current code "over-aligns" the stack pointer so that the stack frame is effectively always mis-aligned by 4 bytes. But what we really want is that on function entry ((sp + 4) & 15) == 0, which matches what would happen if the stack were aligned before a "call" instruction. Signed-off-by: Markus F.X.J. Oberhumer Signed-off-by: Linus Torvalds --- arch/i386/kernel/signal.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 61eb0c8..adcd069 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -338,7 +338,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) esp = (unsigned long) ka->sa.sa_restorer; } - return (void __user *)((esp - frame_size) & -8ul); + esp -= frame_size; + /* Align the stack pointer according to the i386 ABI, + * i.e. so that on function entry ((sp + 4) & 15) == 0. */ + esp = ((esp + 4) & -16ul) - 4; + return (void __user *) esp; } /* These symbols are defined with the addresses in the vsyscall page. -- cgit v1.1 From 3c92c2ba33cd7d666c5f83cc32aa590e794e91b0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 11 Oct 2005 01:28:33 +0200 Subject: [PATCH] i386: Don't discard upper 32bits of HWCR on K8 Need to use long long, not long when RMWing a MSR. I think it's harmless right now, but still should be better fixed if AMD adds any bits in the upper 32bit of HWCR. Bug was introduced with the TLB flush filter fix for i386 Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 4c1ddf2b..53a1681 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -29,7 +29,7 @@ static void __init init_amd(struct cpuinfo_x86 *c) int r; #ifdef CONFIG_SMP - unsigned long value; + unsigned long long value; /* Disable TLB flush filter by setting HWCR.FFDIS on K8 * bit 6 of msr C001_0015 -- cgit v1.1 From bfdc708dc7d26fca66df0157b36356a2ba6166eb Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Thu, 20 Oct 2005 15:16:15 -0700 Subject: [CPUFREQ] kzalloc conversions for i386 drivers. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 3 +-- arch/i386/kernel/cpu/cpufreq/powernow-k7.c | 12 +++--------- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 3 +-- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 3 +-- 4 files changed, 6 insertions(+), 15 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 822c8ce..22b5622 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -376,10 +376,9 @@ acpi_cpufreq_cpu_init ( arg0.buffer.length = 12; arg0.buffer.pointer = (u8 *) arg0_buf; - data = kmalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); + data = kzalloc(sizeof(struct cpufreq_acpi_io), GFP_KERNEL); if (!data) return (-ENOMEM); - memset(data, 0, sizeof(struct cpufreq_acpi_io)); acpi_io_data[cpu] = data; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index 73a5dc5..edcd626 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -171,10 +171,9 @@ static int get_ranges (unsigned char *pst) unsigned int speed; u8 fid, vid; - powernow_table = kmalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); + powernow_table = kzalloc((sizeof(struct cpufreq_frequency_table) * (number_scales + 1)), GFP_KERNEL); if (!powernow_table) return -ENOMEM; - memset(powernow_table, 0, (sizeof(struct cpufreq_frequency_table) * (number_scales + 1))); for (j=0 ; j < number_scales; j++) { fid = *pst++; @@ -305,16 +304,13 @@ static int powernow_acpi_init(void) goto err0; } - acpi_processor_perf = kmalloc(sizeof(struct acpi_processor_performance), + acpi_processor_perf = kzalloc(sizeof(struct acpi_processor_performance), GFP_KERNEL); - if (!acpi_processor_perf) { retval = -ENOMEM; goto err0; } - memset(acpi_processor_perf, 0, sizeof(struct acpi_processor_performance)); - if (acpi_processor_register_performance(acpi_processor_perf, 0)) { retval = -EIO; goto err1; @@ -337,14 +333,12 @@ static int powernow_acpi_init(void) goto err2; } - powernow_table = kmalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); + powernow_table = kzalloc((number_scales + 1) * (sizeof(struct cpufreq_frequency_table)), GFP_KERNEL); if (!powernow_table) { retval = -ENOMEM; goto err2; } - memset(powernow_table, 0, ((number_scales + 1) * sizeof(struct cpufreq_frequency_table))); - pc.val = (unsigned long) acpi_processor_perf->states[0].control; for (i = 0; i < number_scales; i++) { u8 fid, vid; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e2e03ee..beb1011 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -976,12 +976,11 @@ static int __init powernowk8_cpu_init(struct cpufreq_policy *pol) if (!check_supported_cpu(pol->cpu)) return -ENODEV; - data = kmalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); + data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); if (!data) { printk(KERN_ERR PFX "unable to alloc powernow_k8_data"); return -ENOMEM; } - memset(data,0,sizeof(struct powernow_k8_data)); data->cpu = pol->cpu; diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index c397b62..92936c1 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -422,12 +422,11 @@ static int centrino_cpu_init_acpi(struct cpufreq_policy *policy) } } - centrino_model[cpu] = kmalloc(sizeof(struct cpu_model), GFP_KERNEL); + centrino_model[cpu] = kzalloc(sizeof(struct cpu_model), GFP_KERNEL); if (!centrino_model[cpu]) { result = -ENOMEM; goto err_unreg; } - memset(centrino_model[cpu], 0, sizeof(struct cpu_model)); centrino_model[cpu]->model_name=NULL; centrino_model[cpu]->max_freq = p.states[0].core_frequency * 1000; -- cgit v1.1 From 0213df74315bbab9ccaa73146f3e11972ea6de46 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 21 Oct 2005 17:21:03 -0400 Subject: [PATCH] cpufreq: fix pending powernow timer stuck condition AMD recently discovered that on some hardware, there is a race condition possible when a C-state change request goes onto the bus at the same time as a P-state change request. Both requests happen, but the southbridge hardware only acknowledges the C-state change. The PowerNow! driver is then stuck in a loop, waiting for the P-state change acknowledgement. The driver eventually times out, but can no longer perform P-state changes. It turns out the solution is to resend the P-state change, which the southbridge will acknowledge normally. Thanks to Johannes Winkelmann for reporting this and testing the fix. Signed-off-by: Mark Langsdorf Signed-off-by: Dave Jones Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index ab6e061..ec3a9e3 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -44,7 +44,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.50.3" +#define VERSION "version 1.50.4" #include "powernow-k8.h" /* serialize freq changes */ @@ -111,8 +111,8 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) u32 i = 0; do { - if (i++ > 0x1000000) { - printk(KERN_ERR PFX "detected change pending stuck\n"); + if (i++ > 10000) { + dprintk("detected change pending stuck\n"); return 1; } rdmsr(MSR_FIDVID_STATUS, lo, hi); @@ -159,6 +159,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) { u32 lo; u32 savevid = data->currvid; + u32 i = 0; if ((fid & INVALID_FID_MASK) || (data->currvid & INVALID_VID_MASK)) { printk(KERN_ERR PFX "internal error - overflow on fid write\n"); @@ -170,10 +171,13 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) dprintk("writing fid 0x%x, lo 0x%x, hi 0x%x\n", fid, lo, data->plllock * PLL_LOCK_CONVERSION); - wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); - - if (query_current_values_with_pending_wait(data)) - return 1; + do { + wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + retrun 1; + } + } while (query_current_values_with_pending_wait(data)); count_off_irt(data); @@ -197,6 +201,7 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) { u32 lo; u32 savefid = data->currfid; + int i = 0; if ((data->currfid & INVALID_FID_MASK) || (vid & INVALID_VID_MASK)) { printk(KERN_ERR PFX "internal error - overflow on vid write\n"); @@ -208,10 +213,13 @@ static int write_new_vid(struct powernow_k8_data *data, u32 vid) dprintk("writing vid 0x%x, lo 0x%x, hi 0x%x\n", vid, lo, STOP_GRANT_5NS); - wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); - - if (query_current_values_with_pending_wait(data)) - return 1; + do { + wrmsr(MSR_FIDVID_CTL, lo, STOP_GRANT_5NS); + if (i++ > 100) { + printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); + return 1; + } + } while (query_current_values_with_pending_wait(data)); if (savefid != data->currfid) { printk(KERN_ERR PFX "fid changed on vid trans, old 0x%x new 0x%x\n", -- cgit v1.1 From 63172cb3d5ef762dcb60a292bc7f016b85cf6e1f Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Fri, 21 Oct 2005 16:56:08 -0700 Subject: [PATCH] typo fix in last cpufreq powernow patch Not sure how it slipped by, but here's a trivial typo fix for powernow. Signed-off-by: Chris Wright [ It's "nurter" backwards.. Maybe we have a hillbilly The Shining fan? ] Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index ec3a9e3..58ca98f 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -175,7 +175,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) wrmsr(MSR_FIDVID_CTL, lo, data->plllock * PLL_LOCK_CONVERSION); if (i++ > 100) { printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); - retrun 1; + return 1; } } while (query_current_values_with_pending_wait(data)); -- cgit v1.1 From 53f4654272df7c51064825024340554b39c9efba Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Thu, 27 Oct 2005 22:25:43 -0700 Subject: [PATCH] Driver Core: fix up all callers of class_device_create() The previous patch adding the ability to nest struct class_device changed the paramaters to the call class_device_create(). This patch fixes up all in-kernel users of the function. Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/cpuid.c | 2 +- arch/i386/kernel/msr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index 4647db4..13bae79 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -163,7 +163,7 @@ static int cpuid_class_device_create(int i) int err = 0; struct class_device *class_err; - class_err = class_device_create(cpuid_class, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); + class_err = class_device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, i), NULL, "cpu%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; diff --git a/arch/i386/kernel/msr.c b/arch/i386/kernel/msr.c index 03100d6..44470fe 100644 --- a/arch/i386/kernel/msr.c +++ b/arch/i386/kernel/msr.c @@ -246,7 +246,7 @@ static int msr_class_device_create(int i) int err = 0; struct class_device *class_err; - class_err = class_device_create(msr_class, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); + class_err = class_device_create(msr_class, NULL, MKDEV(MSR_MAJOR, i), NULL, "msr%d",i); if (IS_ERR(class_err)) err = PTR_ERR(class_err); return err; -- cgit v1.1 From f8977d0a9b7ac84cfe700278a2ca64cb33c93a13 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 25 Oct 2005 10:28:42 -0700 Subject: [PATCH] PCI fixup for Toshiba laptops and ohci1394 This is a fix for a bug I see on my Toshiba laptop, where the ohci1394 controller gets initialized improperly. The patch adds two PCI fixups to arch/i386/pci/fixup.c, one that happens early on to cache the value of the PCI_CACHE_LINE_SIZE config register, and another that later restores the value, along with a valid IRQ number and some BAR values. I've tested it on my laptop, and it prevents me from running into what I consider to be a major bug: IRQ 11 is disabled by the IRQ debug code, causing my wireless to break. Thanks to Rob for the original patch to ohci1394.c and Stefan for lots of proofreading (and a last minute bug caught in review!) and additional information collection. I think the DMI system list is correct, but we may need to add some more PCI IDs to the PCI_FIXUP macros over time. Signed-off-by: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- arch/i386/pci/fixup.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index 8e8e895..330fd2b 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -2,6 +2,8 @@ * Exceptions for specific devices. Usually work-arounds for fatal design flaws. */ +#include +#include #include #include #include "pci.h" @@ -384,3 +386,60 @@ static void __devinit pci_fixup_video(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); + +/* + * Some Toshiba laptops need extra code to enable their TI TSB43AB22/A. + * + * We pretend to bring them out of full D3 state, and restore the proper + * IRQ, PCI cache line size, and BARs, otherwise the device won't function + * properly. In some cases, the device will generate an interrupt on + * the wrong IRQ line, causing any devices sharing the the line it's + * *supposed* to use to be disabled by the kernel's IRQ debug code. + */ +static u16 toshiba_line_size; + +static struct dmi_system_id __devinit toshiba_ohci1394_dmi_table[] = { + { + .ident = "Toshiba PS5 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PS5"), + }, + }, + { + .ident = "Toshiba PSM4 based laptop", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "TOSHIBA"), + DMI_MATCH(DMI_PRODUCT_VERSION, "PSM4"), + }, + }, + { } +}; + +static void __devinit pci_pre_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + dev->current_state = PCI_D3cold; + pci_read_config_word(dev, PCI_CACHE_LINE_SIZE, &toshiba_line_size); +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_TI, 0x8032, + pci_pre_fixup_toshiba_ohci1394); + +static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) +{ + if (!dmi_check_system(toshiba_ohci1394_dmi_table)) + return; /* only applies to certain Toshibas (so far) */ + + /* Restore config space on Toshiba laptops */ + mdelay(10); + pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, toshiba_line_size); + pci_write_config_word(dev, PCI_INTERRUPT_LINE, dev->irq); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, + pci_resource_start(dev, 0)); + pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, + pci_resource_start(dev, 1)); +} +DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_TI, 0x8032, + pci_post_fixup_toshiba_ohci1394); -- cgit v1.1 From 872fec16d9a0ed3b75b8893aa217e49cca575ee5 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:21 -0700 Subject: [PATCH] mm: init_mm without ptlock First step in pushing down the page_table_lock. init_mm.page_table_lock has been used throughout the architectures (usually for ioremap): not to serialize kernel address space allocation (that's usually vmlist_lock), but because pud_alloc,pmd_alloc,pte_alloc_kernel expect caller holds it. Reverse that: don't lock or unlock init_mm.page_table_lock in any of the architectures; instead rely on pud_alloc,pmd_alloc,pte_alloc_kernel to take and drop it when allocating a new one, to check lest a racing task already did. Similarly no page_table_lock in vmalloc's map_vm_area. Some temporary ugliness in __pud_alloc and __pmd_alloc: since they also handle user mms, which are converted only by a later patch, for now they have to lock differently according to whether or not it's init_mm. If sources get muddled, there's a danger that an arch source taking init_mm.page_table_lock will be mixed with common source also taking it (or neither take it). So break the rules and make another change, which should break the build for such a mismatch: remove the redundant mm arg from pte_alloc_kernel (ppc64 scrapped its distinct ioremap_mm in 2.6.13). Exceptions: arm26 used pte_alloc_kernel on user mm, now pte_alloc_map; ia64 used pte_alloc_map on init_mm, now pte_alloc_kernel; parisc had bad args to pmd_alloc and pte_alloc_kernel in unused USE_HPPA_IOREMAP code; ppc64 map_io_page forgot to unlock on failure; ppc mmu_mapin_ram and ppc64 im_free took page_table_lock for no good reason. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/ioremap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index f379b8d..5d09de8 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -28,7 +28,7 @@ static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long pfn; pfn = phys_addr >> PAGE_SHIFT; - pte = pte_alloc_kernel(&init_mm, pmd, addr); + pte = pte_alloc_kernel(pmd, addr); if (!pte) return -ENOMEM; do { @@ -87,14 +87,12 @@ static int ioremap_page_range(unsigned long addr, flush_cache_all(); phys_addr -= addr; pgd = pgd_offset_k(addr); - spin_lock(&init_mm.page_table_lock); do { next = pgd_addr_end(addr, end); err = ioremap_pud_range(pgd, addr, next, phys_addr+addr, flags); if (err) break; } while (pgd++, addr = next, addr != end); - spin_unlock(&init_mm.page_table_lock); flush_tlb_all(); return err; } -- cgit v1.1 From c34d1b4d165c67b966bca4aba026443d7ff161eb Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:32 -0700 Subject: [PATCH] mm: kill check_user_page_readable check_user_page_readable is a problematic variant of follow_page. It's used only by oprofile's i386 and arm backtrace code, at interrupt time, to establish whether a userspace stackframe is currently readable. This is problematic, because we want to push the page_table_lock down inside follow_page, and later split it; whereas oprofile is doing a spin_trylock on it (in the i386 case, forgotten in the arm case), and needs that to pin perhaps two pages spanned by the stackframe (which might be covered by different locks when we split). I think oprofile is going about this in the wrong way: it doesn't need to know the area is readable (neither i386 nor arm uses read protection of user pages), it doesn't need to pin the memory, it should simply __copy_from_user_inatomic, and see if that succeeds or not. Sorry, but I've not got around to devising the sparse __user annotations for this. Then we can eliminate check_user_page_readable, and return to a single follow_page without the __follow_page variants. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/oprofile/backtrace.c | 38 +++++++++++++------------------------- 1 file changed, 13 insertions(+), 25 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/oprofile/backtrace.c b/arch/i386/oprofile/backtrace.c index 65dfd2e..21654be 100644 --- a/arch/i386/oprofile/backtrace.c +++ b/arch/i386/oprofile/backtrace.c @@ -12,6 +12,7 @@ #include #include #include +#include struct frame_head { struct frame_head * ebp; @@ -21,26 +22,22 @@ struct frame_head { static struct frame_head * dump_backtrace(struct frame_head * head) { - oprofile_add_trace(head->ret); + struct frame_head bufhead[2]; - /* frame pointers should strictly progress back up the stack - * (towards higher addresses) */ - if (head >= head->ebp) + /* Also check accessibility of one struct frame_head beyond */ + if (!access_ok(VERIFY_READ, head, sizeof(bufhead))) + return NULL; + if (__copy_from_user_inatomic(bufhead, head, sizeof(bufhead))) return NULL; - return head->ebp; -} - -/* check that the page(s) containing the frame head are present */ -static int pages_present(struct frame_head * head) -{ - struct mm_struct * mm = current->mm; + oprofile_add_trace(bufhead[0].ret); - /* FIXME: only necessary once per page */ - if (!check_user_page_readable(mm, (unsigned long)head)) - return 0; + /* frame pointers should strictly progress back up the stack + * (towards higher addresses) */ + if (head >= bufhead[0].ebp) + return NULL; - return check_user_page_readable(mm, (unsigned long)(head + 1)); + return bufhead[0].ebp; } /* @@ -97,15 +94,6 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth) return; } -#ifdef CONFIG_SMP - if (!spin_trylock(¤t->mm->page_table_lock)) - return; -#endif - - while (depth-- && head && pages_present(head)) + while (depth-- && head) head = dump_backtrace(head); - -#ifdef CONFIG_SMP - spin_unlock(¤t->mm->page_table_lock); -#endif } -- cgit v1.1 From 60ec5585496871345c1a8113d7b60ed9d9474866 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:34 -0700 Subject: [PATCH] mm: i386 sh sh64 ready for split ptlock Use pte_offset_map_lock, instead of pte_offset_map (or inappropriate pte_offset_kernel) and mm-wide page_table_lock, in sundry arch places. The i386 vm86 mark_screen_rdonly: yes, there was and is an assumption that the screen fits inside the one page table, as indeed it does. The sh __do_page_fault: which handles both kernel faults (without lock) and user mm faults (locked - though it set_pte without locking before). The sh64 flush_cache_range and helpers: which wrongly thought callers held page_table_lock before (only its tlb_start_vma did, and no longer does so); moved the flush loop down, and adjusted the large versus small range decision to consider a range which spans page tables as large. Signed-off-by: Hugh Dickins Acked-by: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/vm86.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 16b4850..fc19935 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -134,17 +134,16 @@ struct pt_regs * fastcall save_v86_state(struct kernel_vm86_regs * regs) return ret; } -static void mark_screen_rdonly(struct task_struct * tsk) +static void mark_screen_rdonly(struct mm_struct *mm) { pgd_t *pgd; pud_t *pud; pmd_t *pmd; - pte_t *pte, *mapped; + pte_t *pte; + spinlock_t *ptl; int i; - preempt_disable(); - spin_lock(&tsk->mm->page_table_lock); - pgd = pgd_offset(tsk->mm, 0xA0000); + pgd = pgd_offset(mm, 0xA0000); if (pgd_none_or_clear_bad(pgd)) goto out; pud = pud_offset(pgd, 0xA0000); @@ -153,16 +152,14 @@ static void mark_screen_rdonly(struct task_struct * tsk) pmd = pmd_offset(pud, 0xA0000); if (pmd_none_or_clear_bad(pmd)) goto out; - pte = mapped = pte_offset_map(pmd, 0xA0000); + pte = pte_offset_map_lock(mm, pmd, 0xA0000, &ptl); for (i = 0; i < 32; i++) { if (pte_present(*pte)) set_pte(pte, pte_wrprotect(*pte)); pte++; } - pte_unmap(mapped); + pte_unmap_unlock(pte, ptl); out: - spin_unlock(&tsk->mm->page_table_lock); - preempt_enable(); flush_tlb(); } @@ -306,7 +303,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) - mark_screen_rdonly(tsk); + mark_screen_rdonly(tsk->mm); __asm__ __volatile__( "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" "movl %0,%%esp\n\t" -- cgit v1.1 From 4c21e2f2441dc5fbb957b030333f5a3f2d02dea7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:40 -0700 Subject: [PATCH] mm: split page table lock Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with a many-threaded application which concurrently initializes different parts of a large anonymous area. This patch corrects that, by using a separate spinlock per page table page, to guard the page table entries in that page, instead of using the mm's single page_table_lock. (But even then, page_table_lock is still used to guard page table allocation, and anon_vma allocation.) In this implementation, the spinlock is tucked inside the struct page of the page table page: with a BUILD_BUG_ON in case it overflows - which it would in the case of 32-bit PA-RISC with spinlock debugging enabled. Splitting the lock is not quite for free: another cacheline access. Ideally, I suppose we would use split ptlock only for multi-threaded processes on multi-cpu machines; but deciding that dynamically would have its own costs. So for now enable it by config, at some number of cpus - since the Kconfig language doesn't support inequalities, let preprocessor compare that with NR_CPUS. But I don't think it's worth being user-configurable: for good testing of both split and unsplit configs, split now at 4 cpus, and perhaps change that to 8 later. There is a benefit even for singly threaded processes: kswapd can be attacking one part of the mm while another part is busy faulting. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/pgtable.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index dcdce2c..39c099f 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -188,19 +188,19 @@ static inline void pgd_list_add(pgd_t *pgd) struct page *page = virt_to_page(pgd); page->index = (unsigned long)pgd_list; if (pgd_list) - pgd_list->private = (unsigned long)&page->index; + set_page_private(pgd_list, (unsigned long)&page->index); pgd_list = page; - page->private = (unsigned long)&pgd_list; + set_page_private(page, (unsigned long)&pgd_list); } static inline void pgd_list_del(pgd_t *pgd) { struct page *next, **pprev, *page = virt_to_page(pgd); next = (struct page *)page->index; - pprev = (struct page **)page->private; + pprev = (struct page **)page_private(page); *pprev = next; if (next) - next->private = (unsigned long)pprev; + set_page_private(next, (unsigned long)pprev); } void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused) -- cgit v1.1 From 208d54e5513c0c02d85af0990901354c74364d5c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 29 Oct 2005 18:16:52 -0700 Subject: [PATCH] memory hotplug locking: node_size_lock pgdat->node_size_lock is basically only neeeded in one place in the normal code: show_mem(), which is the arch-specific sysrq-m printing function. Strictly speaking, the architectures not doing memory hotplug do no need this locking in show_mem(). However, they are all included for completeness. This should also make any future consolidation of all of the implementations a little more straightforward. This lock is also held in the sparsemem code during a memory removal, as sections are invalidated. This is the place there pfn_valid() is made false for a memory area that's being removed. The lock is only required when doing pfn_valid() operations on memory which the user does not already have a reference on the page, such as in show_mem(). Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/pgtable.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index 39c099f..9db3242 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c @@ -31,11 +31,13 @@ void show_mem(void) pg_data_t *pgdat; unsigned long i; struct page_state ps; + unsigned long flags; printk(KERN_INFO "Mem-info:\n"); show_free_areas(); printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10)); for_each_pgdat(pgdat) { + pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { page = pgdat_page_nr(pgdat, i); total++; @@ -48,6 +50,7 @@ void show_mem(void) else if (page_count(page)) shared += page_count(page) - 1; } + pgdat_resize_unlock(pgdat, &flags); } printk(KERN_INFO "%d pages of RAM\n", total); printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); -- cgit v1.1 From 05039b926374212b2d861860cf54b9e839d4dd76 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 29 Oct 2005 18:16:57 -0700 Subject: [PATCH] memory hotplug: i386 addition functions Adds the necessary for non-NUMA hot-add of highmem to an existing zone on i386. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/discontig.c | 4 ++-- arch/i386/mm/init.c | 62 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/discontig.c b/arch/i386/mm/discontig.c index 244d8ec..c4af963 100644 --- a/arch/i386/mm/discontig.c +++ b/arch/i386/mm/discontig.c @@ -98,7 +98,7 @@ unsigned long node_memmap_size_bytes(int nid, unsigned long start_pfn, extern unsigned long find_max_low_pfn(void); extern void find_max_pfn(void); -extern void one_highpage_init(struct page *, int, int); +extern void add_one_highpage_init(struct page *, int, int); extern struct e820map e820; extern unsigned long init_pg_tables_end; @@ -427,7 +427,7 @@ void __init set_highmem_pages_init(int bad_ppro) if (!pfn_valid(node_pfn)) continue; page = pfn_to_page(node_pfn); - one_highpage_init(page, node_pfn, bad_ppro); + add_one_highpage_init(page, node_pfn, bad_ppro); } } totalram_pages += totalhigh_pages; diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 2ebaf75..542d929 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -266,17 +267,46 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __init one_highpage_init(struct page *page, int pfn, int bad_ppro) +void __devinit free_new_highpage(struct page *page) +{ + set_page_count(page, 1); + __free_page(page); + totalhigh_pages++; +} + +void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro) { if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) { ClearPageReserved(page); - set_page_count(page, 1); - __free_page(page); - totalhigh_pages++; + free_new_highpage(page); } else SetPageReserved(page); } +static int add_one_highpage_hotplug(struct page *page, unsigned long pfn) +{ + free_new_highpage(page); + totalram_pages++; +#ifdef CONFIG_FLATMEM + max_mapnr = max(pfn, max_mapnr); +#endif + num_physpages++; + return 0; +} + +/* + * Not currently handling the NUMA case. + * Assuming single node and all memory that + * has been added dynamically that would be + * onlined here is in HIGHMEM + */ +void online_page(struct page *page) +{ + ClearPageReserved(page); + add_one_highpage_hotplug(page, page_to_pfn(page)); +} + + #ifdef CONFIG_NUMA extern void set_highmem_pages_init(int); #else @@ -284,7 +314,7 @@ static void __init set_highmem_pages_init(int bad_ppro) { int pfn; for (pfn = highstart_pfn; pfn < highend_pfn; pfn++) - one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); totalram_pages += totalhigh_pages; } #endif /* CONFIG_FLATMEM */ @@ -615,6 +645,28 @@ void __init mem_init(void) #endif } +/* + * this is for the non-NUMA, single node SMP system case. + * Specifically, in the case of x86, we will always add + * memory to the highmem for now. + */ +#ifndef CONFIG_NEED_MULTIPLE_NODES +int add_memory(u64 start, u64 size) +{ + struct pglist_data *pgdata = &contig_page_data; + struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1; + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + return __add_pages(zone, start_pfn, nr_pages); +} + +int remove_memory(u64 start, u64 size) +{ + return -EINVAL; +} +#endif + kmem_cache_t *pgd_cache; kmem_cache_t *pmd_cache; -- cgit v1.1 From 0d078f6f96809c95c69b99d6605a502b0ac63d3d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 30 Oct 2005 14:59:20 -0800 Subject: [PATCH] CONFIG_IA32 Add CONFIG_X86_32 for i386. This allows selecting options that only apply to 32-bit systems. (X86 && !X86_64) becomes X86_32 (X86 || X86_64) becomes X86 Signed-off-by: Brian Gerst Cc: Sam Ravnborg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index d2703cd..2e3e8db 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -5,7 +5,7 @@ mainmenu "Linux Kernel Configuration" -config X86 +config X86_32 bool default y help @@ -18,6 +18,10 @@ config SEMAPHORE_SLEEPERS bool default y +config X86 + bool + default y + config MMU bool default y -- cgit v1.1 From dacb16b1a034fa7a0b868ee30758119fbfd90bc1 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Sun, 30 Oct 2005 14:59:25 -0800 Subject: [PATCH] i386 and x86_64 TSC set_cyc2ns_scale imprecision I just found out that some precision is unnecessarily lost in the arch/i386/kernel/timers/timer_tsc.c:set_cyc2ns_scale function. It uses a cpu_mhz parameter when it could use a cpu_khz. In the specific case of an Intel P4 running at 3001.171 Mhz, the truncation to 3001 Mhz leads to an imprecision of 19 microseconds per second : this is very sad for a timer with nearly nanosecond accuracy. Fix the x86_64 architecture too. Cc: george anzinger Cc: john stultz Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/timers/timer_hpet.c | 17 +++++++++++------ arch/i386/kernel/timers/timer_tsc.c | 21 +++++++++++++-------- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index d973a8b..be24272 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -30,23 +30,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -163,7 +168,7 @@ static int __init init_hpet(char* override) printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } /* set this only when cpu_has_tsc */ timer_hpet.read_timer = read_timer_tsc; diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index 6dd470c..d395e3b 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -49,23 +49,28 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * basic equation: * ns = cycles / (freq / ns_per_sec) * ns = cycles * (ns_per_sec / freq) - * ns = cycles * (10^9 / (cpu_mhz * 10^6)) - * ns = cycles * (10^3 / cpu_mhz) + * ns = cycles * (10^9 / (cpu_khz * 10^3)) + * ns = cycles * (10^6 / cpu_khz) * * Then we use scaling math (suggested by george@mvista.com) to get: - * ns = cycles * (10^3 * SC / cpu_mhz) / SC + * ns = cycles * (10^6 * SC / cpu_khz) / SC * ns = cycles * cyc2ns_scale / SC * * And since SC is a constant power of two, we can convert the div - * into a shift. + * into a shift. + * + * We can use khz divisor instead of mhz to keep a better percision, since + * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. + * (mathieu.desnoyers@polymtl.ca) + * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ static unsigned long cyc2ns_scale; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ -static inline void set_cyc2ns_scale(unsigned long cpu_mhz) +static inline void set_cyc2ns_scale(unsigned long cpu_khz) { - cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; + cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; } static inline unsigned long long cycles_2_ns(unsigned long long cyc) @@ -286,7 +291,7 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (use_tsc) { if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { fast_gettimeoffset_quotient = cpufreq_scale(fast_gettimeoffset_ref, freq->new, ref_freq); - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); } } #endif @@ -536,7 +541,7 @@ static int __init init_tsc(char* override) printk("Detected %u.%03u MHz processor.\n", cpu_khz / 1000, cpu_khz % 1000); } - set_cyc2ns_scale(cpu_khz/1000); + set_cyc2ns_scale(cpu_khz); return 0; } } -- cgit v1.1 From 8896fab35e62aa748a5ce62ac773508e51f10be1 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 30 Oct 2005 14:59:27 -0800 Subject: [PATCH] x86: cmpxchg improvements This adjusts i386's cmpxchg patterns so that - for word and long cmpxchg-es the compiler can utilize all possible registers - cmpxchg8b gets disabled when the minimum specified hardware architectur doesn't support it (like was already happening for the byte, word, and long ones). Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 2e3e8db..35d3cff 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -424,6 +424,11 @@ config X86_POPAD_OK depends on !M386 default y +config X86_CMPXCHG64 + bool + depends on !M386 && !M486 + default y + config X86_ALIGNMENT_16 bool depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 -- cgit v1.1 From 08967f941ad897b2f7c2f99e886c75d6319e5087 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sun, 30 Oct 2005 14:59:28 -0800 Subject: [PATCH] FPU context corrupted after resume mxcsr_feature_mask_init isn't needed in suspend/resume time (we can use boot time mask). And actually it's harmful, as it clear task's saved fxsave in resume. This bug is widely seen by users using zsh. (akpm: my eyes. Fixed some surrounding whitespace mess) Signed-off-by: Shaohua Li Cc: Pavel Machek Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/power/cpu.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index b27c5ac..1f15726 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -51,16 +51,14 @@ void save_processor_state(void) __save_processor_state(&saved_context); } -static void -do_fpu_end(void) +static void do_fpu_end(void) { - /* restore FPU regs if necessary */ - /* Do it out of line so that gcc does not move cr0 load to some stupid place */ - kernel_fpu_end(); - mxcsr_feature_mask_init(); + /* + * Restore FPU regs if necessary. + */ + kernel_fpu_end(); } - static void fix_processor_context(void) { int cpu = smp_processor_id(); -- cgit v1.1 From d5cd4aadd3d220afac8e3e6d922e333592551f7d Mon Sep 17 00:00:00 2001 From: Bart Oldeman Date: Sun, 30 Oct 2005 14:59:29 -0800 Subject: [PATCH] x86: initialise tss->io_bitmap_owner to something There exists a field io_bitmap_owner in the TSS that is only checked, but never set to anything else but NULL. Signed-off-by: Bart Oldeman Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 19e90bd..c34d1bf 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -488,6 +488,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, tss->io_bitmap_max - thread->io_bitmap_max); tss->io_bitmap_max = thread->io_bitmap_max; tss->io_bitmap_base = IO_BITMAP_OFFSET; + tss->io_bitmap_owner = thread; put_cpu(); return; } -- cgit v1.1 From d16aafff2570abb557a5cb18c98027aabd602e22 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sun, 30 Oct 2005 14:59:30 -0800 Subject: [PATCH] intel_cacheinfo: remove MAX_CACHE_LEAVES limit Initial internal version of Venki's cpuid(4) deterministic cache parameter identification patch used static arrays of size MAX_CACHE_LEAVES. Final patch which made to the base used dynamic array allocation, with this MAX_CACHE_LEAVES limit hunk still in place. cpuid(4) already has a mechanism to find out the number of cache levels implemented and there is no need for this hardcoded MAX_CACHE_LEAVES limit. So remove the MAX_CACHE_LEAVES limit from the routine which calculates the number of cache levels using cpuid(4) Signed-off-by: Suresh Siddha Cc: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 9e0d5f8..c802206 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -117,7 +117,6 @@ struct _cpuid4_info { cpumask_t shared_cpu_map; }; -#define MAX_CACHE_LEAVES 4 static unsigned short num_cache_leaves; static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) @@ -144,20 +143,15 @@ static int __init find_num_cache_leaves(void) { unsigned int eax, ebx, ecx, edx; union _cpuid4_leaf_eax cache_eax; - int i; - int retval; + int i = -1; - retval = MAX_CACHE_LEAVES; - /* Do cpuid(4) loop to find out num_cache_leaves */ - for (i = 0; i < MAX_CACHE_LEAVES; i++) { + do { + ++i; + /* Do cpuid(4) loop to find out num_cache_leaves */ cpuid_count(4, i, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; - if (cache_eax.split.type == CACHE_TYPE_NULL) { - retval = i; - break; - } - } - return retval; + } while (cache_eax.split.type != CACHE_TYPE_NULL); + return i; } unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) -- cgit v1.1 From 9f40a72a7e819789f66910c8cd60aab005cdb413 Mon Sep 17 00:00:00 2001 From: Natalie Protasevich Date: Sun, 30 Oct 2005 14:59:32 -0800 Subject: [PATCH] x86: hot plug CPU to support physical add of new processors The patch allows physical bring-up of new processors (not initially present in the configuration) from facilities such as driver/utility implemented on a platform. The actual method of making processors available is up to the platform implementation. Signed-off-by: Natalie Protasevich Cc: Shaohua Li Cc: Ashok Raj Cc: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/irq.c | 8 ++++---- arch/i386/kernel/mpparse.c | 6 +++--- arch/i386/kernel/smpboot.c | 4 ++++ 3 files changed, 11 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index ce66dcc..1a201a93 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -218,7 +218,7 @@ int show_interrupts(struct seq_file *p, void *v) if (i == 0) { seq_printf(p, " "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "CPU%d ",j); seq_putc(p, '\n'); } @@ -232,7 +232,7 @@ int show_interrupts(struct seq_file *p, void *v) #ifndef CONFIG_SMP seq_printf(p, "%10u ", kstat_irqs(i)); #else - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]); #endif seq_printf(p, " %14s", irq_desc[i].handler->typename); @@ -246,12 +246,12 @@ skip: spin_unlock_irqrestore(&irq_desc[i].lock, flags); } else if (i == NR_IRQS) { seq_printf(p, "NMI: "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", nmi_count(j)); seq_putc(p, '\n'); #ifdef CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); - for_each_cpu(j) + for_each_online_cpu(j) seq_printf(p, "%10u ", per_cpu(irq_stat,j).apic_timer_irqs); seq_putc(p, '\n'); diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 27aabfc..3751bb4 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -69,7 +69,7 @@ unsigned int def_to_bigsmp = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; /* Internal processor count */ -static unsigned int __initdata num_processors; +static unsigned int __devinitdata num_processors; /* Bitmask of physically existing CPUs */ physid_mask_t phys_cpu_present_map; @@ -119,7 +119,7 @@ static int MP_valid_apicid(int apicid, int version) } #endif -static void __init MP_processor_info (struct mpc_config_processor *m) +static void __devinit MP_processor_info (struct mpc_config_processor *m) { int ver, apicid; physid_mask_t phys_cpu; @@ -834,7 +834,7 @@ void __init mp_register_lapic_address ( } -void __init mp_register_lapic ( +void __devinit mp_register_lapic ( u8 id, u8 enabled) { diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 1fb26d0..01b618e 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -87,7 +87,11 @@ EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_callin_map; cpumask_t cpu_callout_map; EXPORT_SYMBOL(cpu_callout_map); +#ifdef CONFIG_HOTPLUG_CPU +cpumask_t cpu_possible_map = CPU_MASK_ALL; +#else cpumask_t cpu_possible_map; +#endif EXPORT_SYMBOL(cpu_possible_map); static cpumask_t smp_commenced_mask; -- cgit v1.1 From 72e12b76fe48d99d1deb417f177b10a9d99b2e74 Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sun, 30 Oct 2005 14:59:33 -0800 Subject: [PATCH] x86: bogus tls from gdt The per-CPU initialization code is copying in bogus data into thread->tls_array. Note that it copies &per_cpu(cpu_gdt_table, cpu), not &per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_TLS_MIN). That is totally broken and unnecessary. Make the initialization explicitly NULL. Signed-off-by: Zachary Amsden Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 9ad43be..a162c03 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -607,12 +607,6 @@ void __devinit cpu_init(void) cpu_gdt_descr[cpu].address = (unsigned long)&per_cpu(cpu_gdt_table, cpu); - /* - * Set up the per-thread TLS descriptor cache: - */ - memcpy(thread->tls_array, &per_cpu(cpu_gdt_table, cpu), - GDT_ENTRY_TLS_ENTRIES * 8); - load_gdt(&cpu_gdt_descr[cpu]); load_idt(&idt_descr); -- cgit v1.1 From 251e6912df43df54570ed68aade703b329c6cd5b Mon Sep 17 00:00:00 2001 From: Zachary Amsden Date: Sun, 30 Oct 2005 14:59:34 -0800 Subject: [PATCH] x86: add an accessor function for getting the per-CPU gdt Add an accessor function for getting the per-CPU gdt. Callee must already have the CPU. Signed-off-by: Zachary Amsden Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 40 ++++++++++++++++++++++------------------ arch/i386/kernel/cpu/common.c | 9 ++++----- arch/i386/mm/fault.c | 2 +- 3 files changed, 27 insertions(+), 24 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index d7811c4..d2ef0c2 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -597,12 +597,14 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, cpumask_t cpus; int cpu; struct desc_struct save_desc_40; + struct desc_struct *gdt; cpus = apm_save_cpus(); cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; + gdt[0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; @@ -610,7 +612,7 @@ static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in, apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi); APM_DO_RESTORE_SEGS; local_irq_restore(flags); - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = save_desc_40; + gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); @@ -639,13 +641,14 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) cpumask_t cpus; int cpu; struct desc_struct save_desc_40; - + struct desc_struct *gdt; cpus = apm_save_cpus(); cpu = get_cpu(); - save_desc_40 = per_cpu(cpu_gdt_table, cpu)[0x40 / 8]; - per_cpu(cpu_gdt_table, cpu)[0x40 / 8] = bad_bios_desc; + gdt = get_cpu_gdt_table(cpu); + save_desc_40 = gdt[0x40 / 8]; + gdt[0x40 / 8] = bad_bios_desc; local_save_flags(flags); APM_DO_CLI; @@ -653,7 +656,7 @@ static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax) error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax); APM_DO_RESTORE_SEGS; local_irq_restore(flags); - __get_cpu_var(cpu_gdt_table)[0x40 / 8] = save_desc_40; + gdt[0x40 / 8] = save_desc_40; put_cpu(); apm_restore_cpus(cpus); return error; @@ -2295,35 +2298,36 @@ static int __init apm_init(void) apm_bios_entry.segment = APM_CS; for (i = 0; i < NR_CPUS; i++) { - set_base(per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + struct desc_struct *gdt = get_cpu_gdt_table(i); + set_base(gdt[APM_CS >> 3], __va((unsigned long)apm_info.bios.cseg << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + set_base(gdt[APM_CS_16 >> 3], __va((unsigned long)apm_info.bios.cseg_16 << 4)); - set_base(per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + set_base(gdt[APM_DS >> 3], __va((unsigned long)apm_info.bios.dseg << 4)); #ifndef APM_RELAX_SEGMENTS if (apm_info.bios.version == 0x100) { #endif /* For ASUS motherboard, Award BIOS rev 110 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 - 1); /* For some unknown machine. */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_CS_16 >> 3], 64 * 1024 - 1); /* For the DEC Hinote Ultra CT475 (and others?) */ - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], 64 * 1024 - 1); + _set_limit((char *)&gdt[APM_DS >> 3], 64 * 1024 - 1); #ifndef APM_RELAX_SEGMENTS } else { - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], + _set_limit((char *)&gdt[APM_CS >> 3], (apm_info.bios.cseg_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS_16 >> 3], + _set_limit((char *)&gdt[APM_CS_16 >> 3], (apm_info.bios.cseg_16_len - 1) & 0xffff); - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_DS >> 3], + _set_limit((char *)&gdt[APM_DS >> 3], (apm_info.bios.dseg_len - 1) & 0xffff); /* workaround for broken BIOSes */ if (apm_info.bios.cseg_len <= apm_info.bios.offset) - _set_limit((char *)&per_cpu(cpu_gdt_table, i)[APM_CS >> 3], 64 * 1024 -1); + _set_limit((char *)&gdt[APM_CS >> 3], 64 * 1024 -1); if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */ /* for the BIOS that assumes granularity = 1 */ - per_cpu(cpu_gdt_table, i)[APM_DS >> 3].b |= 0x800000; + gdt[APM_DS >> 3].b |= 0x800000; printk(KERN_NOTICE "apm: we set the granularity of dseg.\n"); } } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index a162c03..74145a3 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -573,6 +573,7 @@ void __devinit cpu_init(void) int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; + struct desc_struct *gdt = get_cpu_gdt_table(cpu); __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu); if (cpu_test_and_set(cpu, cpu_initialized)) { @@ -594,18 +595,16 @@ void __devinit cpu_init(void) * Initialize the per-CPU GDT with the boot GDT, * and set up the GDT descriptor: */ - memcpy(&per_cpu(cpu_gdt_table, cpu), cpu_gdt_table, - GDT_SIZE); + memcpy(gdt, cpu_gdt_table, GDT_SIZE); /* Set up GDT entry for 16bit stack */ - *(__u64 *)&(per_cpu(cpu_gdt_table, cpu)[GDT_ENTRY_ESPFIX_SS]) |= + *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |= ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) | ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) | (CPU_16BIT_STACK_SIZE - 1); cpu_gdt_descr[cpu].size = GDT_SIZE - 1; - cpu_gdt_descr[cpu].address = - (unsigned long)&per_cpu(cpu_gdt_table, cpu); + cpu_gdt_descr[cpu].address = (unsigned long)gdt; load_gdt(&cpu_gdt_descr[cpu]); load_idt(&idt_descr); diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 9edd448..cf572d9 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -108,7 +108,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, desc = (void *)desc + (seg & ~7); } else { /* Must disable preemption while reading the GDT. */ - desc = (u32 *)&per_cpu(cpu_gdt_table, get_cpu()); + desc = (u32 *)get_cpu_gdt_table(get_cpu()); desc = (void *)desc + (seg & ~7); } -- cgit v1.1 From 434440a2804639f11858d9d384c9505927feb186 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Sun, 30 Oct 2005 14:59:35 -0800 Subject: [PATCH] x86: bug fix in P6 Machine check initialization Make P6 MCA initialization code complaint with guidelines in IA-32 SDM Vol3. Bank 0 control register should not be set by OS and clear status registers on all banks on reset. This will prevent false MCE alarms on the systems that has some non-MCE information left-over in MC0_STATUS on reboot. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/mcheck/p6.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index 3c035b8..979b18b 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -102,11 +102,16 @@ void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); nr_mce_banks = l & 0xff; - /* Don't enable bank 0 on intel P6 cores, it goes bang quickly. */ - for (i=1; i Date: Sun, 30 Oct 2005 14:59:36 -0800 Subject: [PATCH] asus vt8235 router buggy bios workaround Hopefully fix http://bugzilla.kernel.org/show_bug.cgi?id=5235 Similar problem has been reported before here: http://groups.google.com/group/linux.kernel/browse_thread/thread/def4ca19dbc3cd4/5cffbf349f2c87a4?tvc=2&q=Aleksey+Gorelov&hl=en#5cffbf349f2c87a4 and was related to bug in BIOS reporting 82C686 router compatible to 586. I suspect BIOS on this board has similar issue: reports VT8235 router to be compatible with 586 one - which is obviously not true. Patch from the link above has already incorporated in both 2.6 & 2.4 series, but might not work in this particular case. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/pci/irq.c | 55 +++++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 19 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/irq.c b/arch/i386/pci/irq.c index cddafe3..19e6f48 100644 --- a/arch/i386/pci/irq.c +++ b/arch/i386/pci/irq.c @@ -547,31 +547,48 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route return 0; } -static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +static __init int via_router_probe(struct irq_router *r, + struct pci_dev *router, u16 device) { /* FIXME: We should move some of the quirk fixup stuff here */ - if (router->device == PCI_DEVICE_ID_VIA_82C686 && - device == PCI_DEVICE_ID_VIA_82C586_0) { - /* Asus k7m bios wrongly reports 82C686A as 586-compatible */ - device = PCI_DEVICE_ID_VIA_82C686; + /* + * work arounds for some buggy BIOSes + */ + if (device == PCI_DEVICE_ID_VIA_82C586_0) { + switch(router->device) { + case PCI_DEVICE_ID_VIA_82C686: + /* + * Asus k7m bios wrongly reports 82C686A + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_82C686; + break; + case PCI_DEVICE_ID_VIA_8235: + /** + * Asus a7v-x bios wrongly reports 8235 + * as 586-compatible + */ + device = PCI_DEVICE_ID_VIA_8235; + break; + } } - switch(device) - { - case PCI_DEVICE_ID_VIA_82C586_0: - r->name = "VIA"; - r->get = pirq_via586_get; - r->set = pirq_via586_set; - return 1; - case PCI_DEVICE_ID_VIA_82C596: - case PCI_DEVICE_ID_VIA_82C686: - case PCI_DEVICE_ID_VIA_8231: + switch(device) { + case PCI_DEVICE_ID_VIA_82C586_0: + r->name = "VIA"; + r->get = pirq_via586_get; + r->set = pirq_via586_set; + return 1; + case PCI_DEVICE_ID_VIA_82C596: + case PCI_DEVICE_ID_VIA_82C686: + case PCI_DEVICE_ID_VIA_8231: + case PCI_DEVICE_ID_VIA_8235: /* FIXME: add new ones for 8233/5 */ - r->name = "VIA"; - r->get = pirq_via_get; - r->set = pirq_via_set; - return 1; + r->name = "VIA"; + r->get = pirq_via_get; + r->set = pirq_via_set; + return 1; } return 0; } -- cgit v1.1 From f014a556e714dfb02502e3be6146a39ca625f33c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sun, 30 Oct 2005 14:59:37 -0800 Subject: [PATCH] fixup bogus e820 entry with mem= This was reported because someone was getting oopses reading /proc/iomem. It was tracked down to a zero-sized 'struct resource' entry which was located right at 4GB. You need two conditions to hit this bug: a BIOS E820_RAM area starting at exactly the boundary where you specify mem= (to get a zero-sized entry), and for the legacy_init_iomem_resources() loop to skip that resource (which only happens at exactly 4G). I think the killing zero-sized e820 entry is the easiest way to fix this. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 9b8c8a1..b48ac635 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -389,14 +389,24 @@ static void __init limit_regions(unsigned long long size) } } for (i = 0; i < e820.nr_map; i++) { - if (e820.map[i].type == E820_RAM) { - current_addr = e820.map[i].addr + e820.map[i].size; - if (current_addr >= size) { - e820.map[i].size -= current_addr-size; - e820.nr_map = i + 1; - return; - } + current_addr = e820.map[i].addr + e820.map[i].size; + if (current_addr < size) + continue; + + if (e820.map[i].type != E820_RAM) + continue; + + if (e820.map[i].addr >= size) { + /* + * This region starts past the end of the + * requested size, skip it completely. + */ + e820.nr_map = i; + } else { + e820.nr_map = i + 1; + e820.map[i].size -= current_addr - size; } + return; } } -- cgit v1.1 From 30037f66ce63b6b7ca1fbfb06605b831f4a60df6 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Sun, 30 Oct 2005 14:59:38 -0800 Subject: [PATCH] x86: when L3 is present show its size in /proc/cpuinfo The code that prints the cache size assumes that L3 always lives in chipset and is shared across CPUs. Which is not really true. I think all the cachesizes reported by cpuid are in the processor itself. The attached patch changes the code to reflect that. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index c802206..7cc84a4 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -278,13 +278,7 @@ unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if ( l3 ) printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - /* - * This assumes the L3 cache is shared; it typically lives in - * the northbridge. The L1 caches are included by the L2 - * cache, and so should not be included for the purpose of - * SMP switching weights. - */ - c->x86_cache_size = l2 ? l2 : (l1i+l1d); + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); } return l2; -- cgit v1.1 From 9338316c9323682d32270d83b106472a50ab6da4 Mon Sep 17 00:00:00 2001 From: "Natalie.Protasevich@unisys.com" Date: Sun, 30 Oct 2005 14:59:38 -0800 Subject: [PATCH] ES7000 platform update This is platform code update for ES7000: disables IRQ overrides for the recent ES7000 (Rascal/Zorro), cleans up the compile warning. The patch only affects the ES7000 subarch. Signed-off-by: Acked-by: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mach-es7000/es7000.h | 11 ++++++++++- arch/i386/mach-es7000/es7000plat.c | 11 +++++++---- 2 files changed, 17 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/mach-es7000/es7000.h b/arch/i386/mach-es7000/es7000.h index 898ed90..f1e3204 100644 --- a/arch/i386/mach-es7000/es7000.h +++ b/arch/i386/mach-es7000/es7000.h @@ -24,6 +24,15 @@ * http://www.unisys.com */ +/* + * ES7000 chipsets + */ + +#define NON_UNISYS 0 +#define ES7000_CLASSIC 1 +#define ES7000_ZORRO 2 + + #define MIP_REG 1 #define MIP_PSAI_REG 4 @@ -106,6 +115,6 @@ struct mip_reg { extern int parse_unisys_oem (char *oemptr); extern int find_unisys_acpi_oem_table(unsigned long *oem_addr); -extern void setup_unisys (); +extern void setup_unisys(void); extern int es7000_start_cpu(int cpu, unsigned long eip); extern void es7000_sw_apic(void); diff --git a/arch/i386/mach-es7000/es7000plat.c b/arch/i386/mach-es7000/es7000plat.c index dc66605..a9ab064 100644 --- a/arch/i386/mach-es7000/es7000plat.c +++ b/arch/i386/mach-es7000/es7000plat.c @@ -62,6 +62,9 @@ static unsigned int base; static int es7000_rename_gsi(int ioapic, int gsi) { + if (es7000_plat == ES7000_ZORRO) + return gsi; + if (!base) { int i; for (i = 0; i < nr_ioapics; i++) @@ -76,7 +79,7 @@ es7000_rename_gsi(int ioapic, int gsi) #endif /* (CONFIG_X86_IO_APIC) && (CONFIG_ACPI) */ void __init -setup_unisys () +setup_unisys(void) { /* * Determine the generation of the ES7000 currently running. @@ -86,9 +89,9 @@ setup_unisys () * */ if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) - es7000_plat = 2; + es7000_plat = ES7000_ZORRO; else - es7000_plat = 1; + es7000_plat = ES7000_CLASSIC; ioapic_renumber_irq = es7000_rename_gsi; } @@ -151,7 +154,7 @@ parse_unisys_oem (char *oemptr) } if (success < 2) { - es7000_plat = 0; + es7000_plat = NON_UNISYS; } else setup_unisys(); return es7000_plat; -- cgit v1.1 From fcfd636a728fe2b8fb8c8fd8c557302059580577 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 30 Oct 2005 14:59:39 -0800 Subject: [PATCH] i386 io_apic.c: Memorize at bootup where the i8259 is connected Currently we attempt to restore virtual wire mode on reboot, which only works if we can figure out where the i8259 is connected. This is very useful when we kexec another kernel and likely helpful when dealing with a BIOS that make assumptions about how the system is setup. Since the acpi MADT table does not provide the location where the i8259 is connected we have to look at the hardware to figure it out. Most systems have the i8259 connected the local apic of the cpu so won't be affected but people running Opteron and some serverworks chipsets should be able to use kexec now. In addition this patch removes the hard coded assumption that the io_apic that delivers isa interrups is always known to the kernel as io_apic 0. As there does not appear to be anything to guarantee that assumption is true. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 147 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 112 insertions(+), 35 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index fb3991e..cc5d7ac 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -46,6 +46,9 @@ int (*ioapic_renumber_irq)(int ioapic, int irq); atomic_t irq_mis_count; +/* Where if anywhere is the i8259 connect in external int mode */ +static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; + static DEFINE_SPINLOCK(ioapic_lock); /* @@ -738,7 +741,7 @@ static int find_irq_entry(int apic, int pin, int type) /* * Find the pin to which IRQ[irq] (ISA) is connected */ -static int find_isa_irq_pin(int irq, int type) +static int __init find_isa_irq_pin(int irq, int type) { int i; @@ -758,6 +761,33 @@ static int find_isa_irq_pin(int irq, int type) return -1; } +static int __init find_isa_irq_apic(int irq, int type) +{ + int i; + + for (i = 0; i < mp_irq_entries; i++) { + int lbus = mp_irqs[i].mpc_srcbus; + + if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || + mp_bus_id_to_type[lbus] == MP_BUS_EISA || + mp_bus_id_to_type[lbus] == MP_BUS_MCA || + mp_bus_id_to_type[lbus] == MP_BUS_NEC98 + ) && + (mp_irqs[i].mpc_irqtype == type) && + (mp_irqs[i].mpc_srcbusirq == irq)) + break; + } + if (i < mp_irq_entries) { + int apic; + for(apic = 0; apic < nr_ioapics; apic++) { + if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) + return apic; + } + } + + return -1; +} + /* * Find a specific PCI IRQ entry. * Not an __init, possibly needed by modules @@ -1253,7 +1283,7 @@ static void __init setup_IO_APIC_irqs(void) /* * Set up the 8259A-master output pin: */ -static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) +static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) { struct IO_APIC_route_entry entry; unsigned long flags; @@ -1287,8 +1317,8 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); + io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); enable_8259A_irq(0); @@ -1595,7 +1625,8 @@ void /*__init*/ print_PIC(void) static void __init enable_IO_APIC(void) { union IO_APIC_reg_01 reg_01; - int i; + int i8259_apic, i8259_pin; + int i, apic; unsigned long flags; for (i = 0; i < PIN_MAP_SIZE; i++) { @@ -1609,11 +1640,52 @@ static void __init enable_IO_APIC(void) /* * The number of IO-APIC IRQ registers (== #pins): */ - for (i = 0; i < nr_ioapics; i++) { + for (apic = 0; apic < nr_ioapics; apic++) { spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(i, 1); + reg_01.raw = io_apic_read(apic, 1); spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[i] = reg_01.bits.entries+1; + nr_ioapic_registers[apic] = reg_01.bits.entries+1; + } + for(apic = 0; apic < nr_ioapics; apic++) { + int pin; + /* See if any of the pins is in ExtINT mode */ + for(pin = 0; pin < nr_ioapic_registers[i]; pin++) { + struct IO_APIC_route_entry entry; + spin_lock_irqsave(&ioapic_lock, flags); + *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); + *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + spin_unlock_irqrestore(&ioapic_lock, flags); + + + /* If the interrupt line is enabled and in ExtInt mode + * I have found the pin where the i8259 is connected. + */ + if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { + ioapic_i8259.apic = apic; + ioapic_i8259.pin = pin; + goto found_i8259; + } + } + } + found_i8259: + /* Look to see what if the MP table has reported the ExtINT */ + /* If we could not find the appropriate pin by looking at the ioapic + * the i8259 probably is not connected the ioapic but give the + * mptable a chance anyway. + */ + i8259_pin = find_isa_irq_pin(0, mp_ExtINT); + i8259_apic = find_isa_irq_apic(0, mp_ExtINT); + /* Trust the MP table if nothing is setup in the hardware */ + if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { + printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); + ioapic_i8259.pin = i8259_pin; + ioapic_i8259.apic = i8259_apic; + } + /* Complain if the MP table and the hardware disagree */ + if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && + (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) + { + printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); } /* @@ -1627,7 +1699,6 @@ static void __init enable_IO_APIC(void) */ void disable_IO_APIC(void) { - int pin; /* * Clear the IO-APIC before rebooting: */ @@ -1638,8 +1709,7 @@ void disable_IO_APIC(void) * Put that IOAPIC in virtual wire mode * so legacy interrupts can be delivered. */ - pin = find_isa_irq_pin(0, mp_ExtINT); - if (pin != -1) { + if (ioapic_i8259.pin != -1) { struct IO_APIC_route_entry entry; unsigned long flags; @@ -1650,7 +1720,7 @@ void disable_IO_APIC(void) entry.polarity = 0; /* High */ entry.delivery_status = 0; entry.dest_mode = 0; /* Physical */ - entry.delivery_mode = 7; /* ExtInt */ + entry.delivery_mode = dest_ExtINT; /* ExtInt */ entry.vector = 0; entry.dest.physical.physical_dest = 0; @@ -1659,11 +1729,13 @@ void disable_IO_APIC(void) * Add it to the IO-APIC irq-routing table: */ spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); - io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); + io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, + *(((int *)&entry)+1)); + io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, + *(((int *)&entry)+0)); spin_unlock_irqrestore(&ioapic_lock, flags); } - disconnect_bsp_APIC(pin != -1); + disconnect_bsp_APIC(ioapic_i8259.pin != -1); } /* @@ -2113,20 +2185,21 @@ static void setup_nmi (void) */ static inline void unlock_ExtINT_logic(void) { - int pin, i; + int apic, pin, i; struct IO_APIC_route_entry entry0, entry1; unsigned char save_control, save_freq_select; unsigned long flags; - pin = find_isa_irq_pin(8, mp_INT); + pin = find_isa_irq_pin(8, mp_INT); + apic = find_isa_irq_apic(8, mp_INT); if (pin == -1) return; spin_lock_irqsave(&ioapic_lock, flags); - *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); - *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); + *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); + *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); spin_unlock_irqrestore(&ioapic_lock, flags); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(apic, pin); memset(&entry1, 0, sizeof(entry1)); @@ -2139,8 +2212,8 @@ static inline void unlock_ExtINT_logic(void) entry1.vector = 0; spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); save_control = CMOS_READ(RTC_CONTROL); @@ -2158,11 +2231,11 @@ static inline void unlock_ExtINT_logic(void) CMOS_WRITE(save_control, RTC_CONTROL); CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); - clear_IO_APIC_pin(0, pin); + clear_IO_APIC_pin(apic, pin); spin_lock_irqsave(&ioapic_lock, flags); - io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); - io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); + io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); + io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); spin_unlock_irqrestore(&ioapic_lock, flags); } @@ -2174,7 +2247,7 @@ static inline void unlock_ExtINT_logic(void) */ static inline void check_timer(void) { - int pin1, pin2; + int apic1, pin1, apic2, pin2; int vector; /* @@ -2196,10 +2269,13 @@ static inline void check_timer(void) timer_ack = 1; enable_8259A_irq(0); - pin1 = find_isa_irq_pin(0, mp_INT); - pin2 = find_isa_irq_pin(0, mp_ExtINT); + pin1 = find_isa_irq_pin(0, mp_INT); + apic1 = find_isa_irq_apic(0, mp_INT); + pin2 = ioapic_i8259.pin; + apic2 = ioapic_i8259.apic; - printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); + printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", + vector, apic1, pin1, apic2, pin2); if (pin1 != -1) { /* @@ -2216,8 +2292,9 @@ static inline void check_timer(void) clear_IO_APIC_pin(0, pin1); return; } - clear_IO_APIC_pin(0, pin1); - printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); + clear_IO_APIC_pin(apic1, pin1); + printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " + "IO-APIC\n"); } printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); @@ -2226,13 +2303,13 @@ static inline void check_timer(void) /* * legacy devices should be connected to IO APIC #0 */ - setup_ExtINT_IRQ0_pin(pin2, vector); + setup_ExtINT_IRQ0_pin(apic2, pin2, vector); if (timer_irq_works()) { printk("works.\n"); if (pin1 != -1) - replace_pin_at_irq(0, 0, pin1, 0, pin2); + replace_pin_at_irq(0, apic1, pin1, apic2, pin2); else - add_pin_to_irq(0, 0, pin2); + add_pin_to_irq(0, apic2, pin2); if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } @@ -2241,7 +2318,7 @@ static inline void check_timer(void) /* * Cleanup, just in case ... */ - clear_IO_APIC_pin(0, pin2); + clear_IO_APIC_pin(apic2, pin2); } printk(" failed.\n"); -- cgit v1.1 From 29b70081f7cb094513d5189e82d3478b50777a28 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 30 Oct 2005 14:59:40 -0800 Subject: [PATCH] i386 nmi_watchdog: Merge check_nmi_watchdog fixes from x86_64 The per cpu nmi watchdog timer is based on an event counter. idle cpus don't generate events so the NMI watchdog doesn't fire and the test to see if the watchdog is working fails. - Add nmi_cpu_busy so idle cpus don't mess up the test. - kmalloc prev_nmi_count to keep kernel stack usage bounded. - Improve the error message on failure so there is enough information to debug problems. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/nmi.c | 39 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 72515b8..d661703 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -100,16 +100,44 @@ int nmi_active; (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) +#ifdef CONFIG_SMP +/* The performance counters used by NMI_LOCAL_APIC don't trigger when + * the CPU is idle. To make sure the NMI watchdog really ticks on all + * CPUs during the test make them busy. + */ +static __init void nmi_cpu_busy(void *data) +{ + volatile int *endflag = data; + local_irq_enable(); + /* Intentionally don't use cpu_relax here. This is + to make sure that the performance counter really ticks, + even if there is a simulator or similar that catches the + pause instruction. On a real HT machine this is fine because + all other CPUs are busy with "useless" delay loops and don't + care if they get somewhat less cycles. */ + while (*endflag == 0) + barrier(); +} +#endif + static int __init check_nmi_watchdog(void) { - unsigned int prev_nmi_count[NR_CPUS]; + volatile int endflag = 0; + unsigned int *prev_nmi_count; int cpu; if (nmi_watchdog == NMI_NONE) return 0; + prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); + if (!prev_nmi_count) + return -1; + printk(KERN_INFO "Testing NMI watchdog ... "); + if (nmi_watchdog == NMI_LOCAL_APIC) + smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); + for (cpu = 0; cpu < NR_CPUS; cpu++) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); @@ -123,12 +151,18 @@ static int __init check_nmi_watchdog(void) continue; #endif if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { - printk("CPU#%d: NMI appears to be stuck!\n", cpu); + endflag = 1; + printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", + cpu, + prev_nmi_count[cpu], + nmi_count(cpu)); nmi_active = 0; lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; + kfree(prev_nmi_count); return -1; } } + endflag = 1; printk("OK.\n"); /* now that we know it works we can reduce NMI frequency to @@ -136,6 +170,7 @@ static int __init check_nmi_watchdog(void) if (nmi_watchdog == NMI_LOCAL_APIC) nmi_hz = 1; + kfree(prev_nmi_count); return 0; } /* This needs to happen later in boot so counters are working */ -- cgit v1.1 From f2b36db692b7ff6972320ad9839ae656a3b0ee3e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 30 Oct 2005 14:59:41 -0800 Subject: [PATCH] i386: move apic init in init_IRQs All kinds of ugliness exists because we don't initialize the apics during init_IRQs. - We calibrate jiffies in non apic mode even when we are using apics. - We have to have special code to initialize the apics when non-smp. - The legacy i8259 must exist and be setup correctly, even when we won't use it past initialization. - The kexec on panic code must restore the state of the io_apics. - init/main.c needs a special case for !smp smp_init on x86 In addition to pure code movement I needed a couple of non-obvious changes: - Move setup_boot_APIC_clock into APIC_late_time_init for simplicity. - Use cpu_khz to generate a better approximation of loops_per_jiffies so I can verify the timer interrupt is working. - Call setup_apic_nmi_watchdog again after cpu_khz is initialized on the boot cpu. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 81 ++++++++++++++++++++++++++++++++++++---------- arch/i386/kernel/i8259.c | 4 +++ arch/i386/kernel/io_apic.c | 6 +++- arch/i386/kernel/smpboot.c | 68 +++++++++----------------------------- arch/i386/kernel/time.c | 12 ++++++- 5 files changed, 100 insertions(+), 71 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 5546dde..8d81b7b 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -803,6 +803,7 @@ no_apic: void __init init_apic_mappings(void) { + unsigned int orig_apicid; unsigned long apic_phys; /* @@ -824,8 +825,11 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - if (boot_cpu_physical_apicid == -1U) - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + orig_apicid = boot_cpu_physical_apicid; + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); + if ((orig_apicid != -1U) && (orig_apicid != boot_cpu_physical_apicid)) + printk(KERN_WARNING "Boot APIC ID in local APIC unexpected (%d vs %d)", + orig_apicid, boot_cpu_physical_apicid); #ifdef CONFIG_X86_IO_APIC { @@ -1046,9 +1050,11 @@ static unsigned int calibration_result; void __init setup_boot_APIC_clock(void) { + unsigned long flags; apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"); using_apic_timer = 1; + local_irq_save(flags); local_irq_disable(); calibration_result = calibrate_APIC_clock(); @@ -1057,7 +1063,7 @@ void __init setup_boot_APIC_clock(void) */ setup_APIC_timer(calibration_result); - local_irq_enable(); + local_irq_restore(flags); } void __devinit setup_secondary_APIC_clock(void) @@ -1254,40 +1260,81 @@ fastcall void smp_error_interrupt(struct pt_regs *regs) } /* - * This initializes the IO-APIC and APIC hardware if this is - * a UP kernel. + * This initializes the IO-APIC and APIC hardware. */ -int __init APIC_init_uniprocessor (void) +int __init APIC_init(void) { - if (enable_local_apic < 0) - clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); + if (enable_local_apic < 0) { + printk(KERN_INFO "APIC disabled\n"); + return -1; + } - if (!smp_found_config && !cpu_has_apic) + /* See if we have a SMP configuration or have forced enabled + * the local apic. + */ + if (!smp_found_config && !acpi_lapic && !cpu_has_apic) { + enable_local_apic = -1; return -1; + } /* - * Complain if the BIOS pretends there is one. + * Complain if the BIOS pretends there is an apic. + * Then get out because we don't have an a local apic. */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); + printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + enable_local_apic = -1; return -1; } verify_local_APIC(); + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + * Makes no sense to do this check in clustered apic mode, so skip it + */ + if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + physid_set(hard_smp_processor_id(), phys_cpu_present_map); + } + + /* + * Switch from PIC to APIC mode. + */ connect_bsp_APIC(); + setup_local_APIC(); - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); +#ifdef CONFIG_X86_IO_APIC + /* + * Now start the IO-APICs + */ + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); +#endif + return 0; +} - setup_local_APIC(); +void __init APIC_late_time_init(void) +{ + /* Improve our loops per jiffy estimate */ + loops_per_jiffy = ((1000 + HZ - 1)/HZ)*cpu_khz; + boot_cpu_data.loops_per_jiffy = loops_per_jiffy; + cpu_data[0].loops_per_jiffy = loops_per_jiffy; + + /* setup_apic_nmi_watchdog doesn't work properly before cpu_khz is + * initialized. So redo it here to ensure the boot cpu is setup + * properly. + */ + if (nmi_watchdog == NMI_LOCAL_APIC) + setup_apic_nmi_watchdog(); #ifdef CONFIG_X86_IO_APIC - if (smp_found_config) - if (!skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); + if (smp_found_config && !skip_ioapic_setup && nr_ioapics) + IO_APIC_late_time_init(); #endif setup_boot_APIC_clock(); - - return 0; } diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index 323ef8a..d86f249 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -435,4 +435,8 @@ void __init init_IRQ(void) setup_irq(FPU_IRQ, &fpu_irq); irq_ctx_init(smp_processor_id()); + +#ifdef CONFIG_X86_LOCAL_APIC + APIC_init(); +#endif } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index cc5d7ac..5a77c52 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -2387,11 +2387,15 @@ void __init setup_IO_APIC(void) sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); - check_timer(); if (!acpi_ioapic) print_IO_APIC(); } +void __init IO_APIC_late_time_init(void) +{ + check_timer(); +} + /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 01b618e..5a2bbe0 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1078,6 +1078,16 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif +/* + * Fall back to non SMP mode after errors. + * + */ +static __init void disable_smp(void) +{ + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); +} + static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; @@ -1090,7 +1100,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk("CPU%d: ", 0); print_cpu_info(&cpu_data[0]); - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; @@ -1102,68 +1111,27 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpus_clear(cpu_core_map[0]); cpu_set(0, cpu_core_map[0]); + map_cpu_to_logical_apicid(); + /* * If we couldn't find an SMP configuration at boot time, * get out of here now! */ if (!smp_found_config && !acpi_lapic) { printk(KERN_NOTICE "SMP motherboard not detected.\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - if (APIC_init_uniprocessor()) - printk(KERN_NOTICE "Local APIC not detected." - " Using dummy APIC emulation.\n"); - map_cpu_to_logical_apicid(); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); - return; - } - - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * Makes no sense to do this check in clustered apic mode, so skip it - */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); - } - - /* - * If we couldn't find a local APIC, then get out of here now! - */ - if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { - printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", - boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + disable_smp(); return; } - verify_local_APIC(); - /* * If SMP should be disabled, then really disable it! */ - if (!max_cpus) { - smp_found_config = 0; - printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); - smpboot_clear_io_apic_irqs(); - phys_cpu_present_map = physid_mask_of_physid(0); - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); + if (!max_cpus || (enable_local_apic < 0)) { + printk(KERN_INFO "SMP mode deactivated.\n"); + disable_smp(); return; } - connect_bsp_APIC(); - setup_local_APIC(); - map_cpu_to_logical_apicid(); - - setup_portio_remap(); /* @@ -1244,10 +1212,6 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpu_set(0, cpu_sibling_map[0]); cpu_set(0, cpu_core_map[0]); - smpboot_setup_io_apic(); - - setup_boot_APIC_clock(); - /* * Synchronize the TSC with the AP */ diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 2883a4d..46c35ec 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -444,8 +444,8 @@ static int time_init_device(void) device_initcall(time_init_device); -#ifdef CONFIG_HPET_TIMER extern void (*late_time_init)(void); +#ifdef CONFIG_HPET_TIMER /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { @@ -462,6 +462,11 @@ static void __init hpet_time_init(void) printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); + +#ifdef CONFIG_X86_LOCAL_APIC + if (enable_local_apic >= 0) + APIC_late_time_init(); +#endif } #endif @@ -486,4 +491,9 @@ void __init time_init(void) printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); + +#ifdef CONFIG_X86_LOCAL_APIC + if (enable_local_apic >= 0) + late_time_init = APIC_late_time_init; +#endif } -- cgit v1.1 From 3d1675b41b02d64bd1185903ea0d25a8c0bb6dea Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 30 Oct 2005 14:59:42 -0800 Subject: [PATCH] i386 kexec-on-panic: Don't shutdown the apics. It is dangerous to shutdown the apics in machine_crash_shutdown. With my previous patch to initialize apics in init_IRQ we should be able to boot a kernel without this. As long as we reinitialize the APICs we don't care what state they were in during bootup. This should make machine_crash_shutdown noticeably more reliable. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 0248e08..af809cc 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -21,7 +21,6 @@ #include #include #include -#include #include @@ -148,7 +147,6 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) regs = &fixed_regs; } crash_save_this_cpu(regs, cpu); - disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ halt(); @@ -188,7 +186,6 @@ static void nmi_shootdown_cpus(void) } /* Leave the nmi callback set */ - disable_local_APIC(); } #else static void nmi_shootdown_cpus(void) @@ -213,9 +210,5 @@ void machine_crash_shutdown(struct pt_regs *regs) /* Make a note of crashing cpu. Will be used in NMI callback.*/ crashing_cpu = smp_processor_id(); nmi_shootdown_cpus(); - lapic_shutdown(); -#if defined(CONFIG_X86_IO_APIC) - disable_IO_APIC(); -#endif crash_save_self(regs); } -- cgit v1.1 From daedb82d6b54e58a66ad1dce3509e699a5bd1b18 Mon Sep 17 00:00:00 2001 From: "Kamble, Nitin A" Date: Sun, 30 Oct 2005 14:59:43 -0800 Subject: [PATCH] x86: vmx cpu feature detection If VMX feature is available in the CPU, this patch will make it visible in the /proc/cpuinfo with the cpuid detection. Signed-Off-By: Nitin A Kamble Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 8bd77d9..41b871e 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -44,7 +44,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -- cgit v1.1 From c53117815771e1e84e6ba80a42fa1f8e330adb4d Mon Sep 17 00:00:00 2001 From: Brian Gerst Date: Sun, 30 Oct 2005 14:59:44 -0800 Subject: [PATCH] Clean up mtrr compat ioctl code Handle 32-bit mtrr ioctls in the mtrr driver instead of the ia32 compatability layer. Signed-off-by: Brian Gerst Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/mtrr/if.c | 119 +++++++++++++++++++++++++---------------- 1 file changed, 74 insertions(+), 45 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/mtrr/if.c b/arch/i386/kernel/cpu/mtrr/if.c index 1923e0a..cf39e20 100644 --- a/arch/i386/kernel/cpu/mtrr/if.c +++ b/arch/i386/kernel/cpu/mtrr/if.c @@ -149,60 +149,89 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) return -EINVAL; } -static int -mtrr_ioctl(struct inode *inode, struct file *file, - unsigned int cmd, unsigned long __arg) +static long +mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) { - int err; + int err = 0; mtrr_type type; struct mtrr_sentry sentry; struct mtrr_gentry gentry; void __user *arg = (void __user *) __arg; switch (cmd) { + case MTRRIOC_ADD_ENTRY: + case MTRRIOC_SET_ENTRY: + case MTRRIOC_DEL_ENTRY: + case MTRRIOC_KILL_ENTRY: + case MTRRIOC_ADD_PAGE_ENTRY: + case MTRRIOC_SET_PAGE_ENTRY: + case MTRRIOC_DEL_PAGE_ENTRY: + case MTRRIOC_KILL_PAGE_ENTRY: + if (copy_from_user(&sentry, arg, sizeof sentry)) + return -EFAULT; + break; + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: + if (copy_from_user(&gentry, arg, sizeof gentry)) + return -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_ADD_ENTRY: + case MTRRIOC32_SET_ENTRY: + case MTRRIOC32_DEL_ENTRY: + case MTRRIOC32_KILL_ENTRY: + case MTRRIOC32_ADD_PAGE_ENTRY: + case MTRRIOC32_SET_PAGE_ENTRY: + case MTRRIOC32_DEL_PAGE_ENTRY: + case MTRRIOC32_KILL_PAGE_ENTRY: { + struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; + err = get_user(sentry.base, &s32->base); + err |= get_user(sentry.size, &s32->size); + err |= get_user(sentry.type, &s32->type); + if (err) + return err; + break; + } + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = get_user(gentry.regnum, &g32->regnum); + err |= get_user(gentry.base, &g32->base); + err |= get_user(gentry.size, &g32->size); + err |= get_user(gentry.type, &g32->type); + if (err) + return err; + break; + } +#endif + } + + switch (cmd) { default: return -ENOTTY; case MTRRIOC_ADD_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, file, 0); - if (err < 0) - return err; break; case MTRRIOC_SET_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_add(sentry.base, sentry.size, sentry.type, 0); - if (err < 0) - return err; break; case MTRRIOC_DEL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_del(sentry.base, sentry.size, file, 0); - if (err < 0) - return err; break; case MTRRIOC_KILL_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_del(-1, sentry.base, sentry.size); - if (err < 0) - return err; break; case MTRRIOC_GET_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; if (gentry.regnum >= num_var_ranges) return -EINVAL; mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); @@ -217,60 +246,59 @@ mtrr_ioctl(struct inode *inode, struct file *file, gentry.type = type; } - if (copy_to_user(arg, &gentry, sizeof gentry)) - return -EFAULT; break; case MTRRIOC_ADD_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_add(sentry.base, sentry.size, sentry.type, 1, file, 1); - if (err < 0) - return err; break; case MTRRIOC_SET_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_add_page(sentry.base, sentry.size, sentry.type, 0); - if (err < 0) - return err; break; case MTRRIOC_DEL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_file_del(sentry.base, sentry.size, file, 1); - if (err < 0) - return err; break; case MTRRIOC_KILL_PAGE_ENTRY: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&sentry, arg, sizeof sentry)) - return -EFAULT; err = mtrr_del_page(-1, sentry.base, sentry.size); - if (err < 0) - return err; break; case MTRRIOC_GET_PAGE_ENTRY: - if (copy_from_user(&gentry, arg, sizeof gentry)) - return -EFAULT; if (gentry.regnum >= num_var_ranges) return -EINVAL; mtrr_if->get(gentry.regnum, &gentry.base, &gentry.size, &type); gentry.type = type; + break; + } + + if (err) + return err; + switch(cmd) { + case MTRRIOC_GET_ENTRY: + case MTRRIOC_GET_PAGE_ENTRY: if (copy_to_user(arg, &gentry, sizeof gentry)) - return -EFAULT; + err = -EFAULT; + break; +#ifdef CONFIG_COMPAT + case MTRRIOC32_GET_ENTRY: + case MTRRIOC32_GET_PAGE_ENTRY: { + struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; + err = put_user(gentry.base, &g32->base); + err |= put_user(gentry.size, &g32->size); + err |= put_user(gentry.regnum, &g32->regnum); + err |= put_user(gentry.type, &g32->type); break; } - return 0; +#endif + } + return err; } static int @@ -310,7 +338,8 @@ static struct file_operations mtrr_fops = { .read = seq_read, .llseek = seq_lseek, .write = mtrr_write, - .ioctl = mtrr_ioctl, + .unlocked_ioctl = mtrr_ioctl, + .compat_ioctl = mtrr_ioctl, .release = mtrr_close, }; -- cgit v1.1 From 009b29d90f575a83eba185950a7182ab05e7741a Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sun, 30 Oct 2005 14:59:46 -0800 Subject: [PATCH] kdump/i386: apic verification failure fix o Removes the unnecessary call to local_irq_disable(). o Kdump was failing while second kernel was coming up. Check for presence of boot cpu apic id was failing in (apic_id_registered), hence hitting BUG(). o This should not have failed because before calling setup_local_APIC(), it is ensured that even if BIOS has not reported boot cpu, then hard set the prence of it. Problem happens because of usage of hard_smp_processor_id() which is hardcoded to zero in case of non SMP kernel. In kdump case second kernel can boot on a cpu whose boot cpu id is not zero. o Using boot_cpu_physical_apicid instead to hard set the presence of boot cpu. Signed-off-by: Vivek Goyal Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 8d81b7b..9204be6 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -1055,7 +1055,6 @@ void __init setup_boot_APIC_clock(void) using_apic_timer = 1; local_irq_save(flags); - local_irq_disable(); calibration_result = calibrate_APIC_clock(); /* @@ -1299,7 +1298,7 @@ int __init APIC_init(void) if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { printk("weird, boot CPU (#%d) not listed by the BIOS.\n", boot_cpu_physical_apicid); - physid_set(hard_smp_processor_id(), phys_cpu_present_map); + physid_set(boot_cpu_physical_apicid, phys_cpu_present_map); } /* -- cgit v1.1 From 6c180d94abdcfb77a2fe4275bd03687fa159acd7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 30 Oct 2005 14:59:47 -0800 Subject: [PATCH] i386 mpparse: Only ignore lapic information we can't store After staring at mpparse.c for a little longer I noticed that when we hit our limit of num_processors we are filtering out information about other processors that we can still store. This patch just reorders the code so we store everything we can. This should avoid the incorrect warning about our boot CPU not being listed by the BIOS that we are now getting in the kexec on panic case, and it should allow us to detect all apicid conflicts even when our physical number of cpus exceeds maxcpus. Signed-off-by: Eric W. Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/mpparse.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 3751bb4..8f767d9 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -182,17 +182,6 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) boot_cpu_physical_apicid = m->mpc_apicid; } - if (num_processors >= NR_CPUS) { - printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." - " Processor ignored.\n", NR_CPUS); - return; - } - - if (num_processors >= maxcpus) { - printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." - " Processor ignored.\n", maxcpus); - return; - } ver = m->mpc_apicver; if (!MP_valid_apicid(apicid, ver)) { @@ -201,11 +190,6 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) return; } - cpu_set(num_processors, cpu_possible_map); - num_processors++; - phys_cpu = apicid_to_cpu_present(apicid); - physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); - /* * Validate version */ @@ -216,6 +200,25 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) ver = 0x10; } apic_version[m->mpc_apicid] = ver; + + phys_cpu = apicid_to_cpu_present(apicid); + physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu); + + if (num_processors >= NR_CPUS) { + printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." + " Processor ignored.\n", NR_CPUS); + return; + } + + if (num_processors >= maxcpus) { + printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." + " Processor ignored.\n", maxcpus); + return; + } + + cpu_set(num_processors, cpu_possible_map); + num_processors++; + if ((num_processors > 8) && APIC_XAPIC(ver) && (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) -- cgit v1.1 From 5d35704028d09a183448daceab5dcb94f21a3645 Mon Sep 17 00:00:00 2001 From: Magnus Damm Date: Sun, 30 Oct 2005 14:59:48 -0800 Subject: [PATCH] i386: srat on non-acpi hw fix This patch adds a check for the return value of acpi_find_root_pointer(). Without this patch systems without ACPI support such as QEMU crashes when booting a NUMA kernel with CONFIG_ACPI_SRAT=y. Signed-off-by: Magnus Damm Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/srat.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 516bf56..8de658d 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -327,7 +327,12 @@ int __init get_memcfg_from_srat(void) int tables = 0; int i = 0; - acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, rsdp_address); + if (ACPI_FAILURE(acpi_find_root_pointer(ACPI_PHYSICAL_ADDRESSING, + rsdp_address))) { + printk("%s: System description tables not found\n", + __FUNCTION__); + goto out_err; + } if (rsdp_address->pointer_type == ACPI_PHYSICAL_POINTER) { printk("%s: assigning address to rsdp\n", __FUNCTION__); -- cgit v1.1 From 1aa1a9f98ffd06e288be4d85ed814c6cdbccce82 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sun, 30 Oct 2005 14:59:50 -0800 Subject: [PATCH] create and destroy cache sysfs entries based on cpu notifiers cpu cache entries should be populated only when cpu is online and removed when they are logically offlined. Without which entries are not removed when cpu is offlined, or dont appear when we boot with maxcpus=1 and then kick the rest of the cpus via echo 1 to the sysfs online file. - Changed __devinit to __cpuinit for consistency. - Changed sysfs_driver_register to register_cpu_notifier. Signed-off-by: Ashok Raj Signed-off-by: Venkatesh Pallipadi Cc: Dave Jones Cc: Zwane Mwaikambo Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 60 ++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 17 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 7cc84a4..f083933 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -3,6 +3,7 @@ * * Changes: * Venkatesh Pallipadi : Adding cache identification through cpuid(4) + * Ashok Raj : Work with CPU hotplug infrastructure. */ #include @@ -28,7 +29,7 @@ struct _cache_table }; /* all the cache descriptor types we care about (no TLB or trace cache entries) */ -static struct _cache_table cache_table[] __devinitdata = +static struct _cache_table cache_table[] __cpuinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ @@ -119,7 +120,7 @@ struct _cpuid4_info { static unsigned short num_cache_leaves; -static int __devinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) +static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) { unsigned int eax, ebx, ecx, edx; union _cpuid4_leaf_eax cache_eax; @@ -154,7 +155,7 @@ static int __init find_num_cache_leaves(void) return i; } -unsigned int __devinit init_intel_cacheinfo(struct cpuinfo_x86 *c) +unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) { unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ @@ -289,7 +290,7 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS]; #define CPUID4_INFO_IDX(x,y) (&((cpuid4_info[x])[y])) #ifdef CONFIG_SMP -static void __devinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf; unsigned long num_threads_sharing; @@ -322,7 +323,7 @@ static void free_cache_attributes(unsigned int cpu) cpuid4_info[cpu] = NULL; } -static int __devinit detect_cache_attributes(unsigned int cpu) +static int __cpuinit detect_cache_attributes(unsigned int cpu) { struct _cpuid4_info *this_leaf; unsigned long j; @@ -499,7 +500,7 @@ static void cpuid4_cache_sysfs_exit(unsigned int cpu) free_cache_attributes(cpu); } -static int __devinit cpuid4_cache_sysfs_init(unsigned int cpu) +static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) { if (num_cache_leaves == 0) @@ -530,7 +531,7 @@ err_out: } /* Add/Remove cache interface for CPU device */ -static int __devinit cache_add_dev(struct sys_device * sys_dev) +static int __cpuinit cache_add_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i, j; @@ -567,7 +568,7 @@ static int __devinit cache_add_dev(struct sys_device * sys_dev) return retval; } -static int __devexit cache_remove_dev(struct sys_device * sys_dev) +static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) { unsigned int cpu = sys_dev->id; unsigned long i; @@ -576,24 +577,49 @@ static int __devexit cache_remove_dev(struct sys_device * sys_dev) kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); - return 0; + return; +} + +static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + switch (action) { + case CPU_ONLINE: + cache_add_dev(sys_dev); + break; + case CPU_DEAD: + cache_remove_dev(sys_dev); + break; + } + return NOTIFY_OK; } -static struct sysdev_driver cache_sysdev_driver = { - .add = cache_add_dev, - .remove = __devexit_p(cache_remove_dev), +static struct notifier_block cacheinfo_cpu_notifier = +{ + .notifier_call = cacheinfo_cpu_callback, }; -/* Register/Unregister the cpu_cache driver */ -static int __devinit cache_register_driver(void) +static int __cpuinit cache_sysfs_init(void) { + int i; + if (num_cache_leaves == 0) return 0; - return sysdev_driver_register(&cpu_sysdev_class,&cache_sysdev_driver); + register_cpu_notifier(&cacheinfo_cpu_notifier); + + for_each_online_cpu(i) { + cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, + (void *)(long)i); + } + + return 0; } -device_initcall(cache_register_driver); +device_initcall(cache_sysfs_init); #endif - -- cgit v1.1 From 96d55b882b85b26711a06d8fb2c901df9d52a48b Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sun, 30 Oct 2005 15:00:07 -0800 Subject: [PATCH] uml: reuse i386 cpu-specific tuning Make UML share the underlying cpu-specific tuning done on i386. Actually, for now many config options aren't used a lot - but that can be done later. Also, UML relies on GCC optimization for things like memcpy and such more than i386, so specifying the correct -march and -mtune should be enough. Later, we may want to correct some other stuff. For instance, since FPU context switching, for us, is done (at least partially, i.e. between our kernelspace and userspace) by the host, we may allow usage of FPU operations by GCC. This doesn't hold for kernelspace vs. kernelspace, but we don't support preemption. Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 309 +------------------------------------------------ arch/i386/Kconfig.cpu | 309 +++++++++++++++++++++++++++++++++++++++++++++++++ arch/i386/Makefile | 31 +---- arch/i386/Makefile.cpu | 33 ++++++ 4 files changed, 345 insertions(+), 337 deletions(-) create mode 100644 arch/i386/Kconfig.cpu create mode 100644 arch/i386/Makefile.cpu (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 35d3cff..5383e5e 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -155,309 +155,7 @@ config ES7000_CLUSTERED_APIC default y depends on SMP && X86_ES7000 && MPENTIUMIII -if !X86_ELAN - -choice - prompt "Processor family" - default M686 - -config M386 - bool "386" - ---help--- - This is the processor type of your CPU. This information is used for - optimizing purposes. In order to compile a kernel that can run on - all x86 CPU types (albeit not optimally fast), you can specify - "386" here. - - The kernel will not necessarily run on earlier architectures than - the one you have chosen, e.g. a Pentium optimized kernel will run on - a PPro, but not necessarily on a i486. - - Here are the settings recommended for greatest speed: - - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI - 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels - will run on a 386 class machine. - - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or - SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. - - "586" for generic Pentium CPUs lacking the TSC - (time stamp counter) register. - - "Pentium-Classic" for the Intel Pentium. - - "Pentium-MMX" for the Intel Pentium MMX. - - "Pentium-Pro" for the Intel Pentium Pro. - - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. - - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. - - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. - - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). - - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). - - "Crusoe" for the Transmeta Crusoe series. - - "Efficeon" for the Transmeta Efficeon series. - - "Winchip-C6" for original IDT Winchip. - - "Winchip-2" for IDT Winchip 2. - - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. - - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). - - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. - - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). - - If you don't know what to do, choose "386". - -config M486 - bool "486" - help - Select this for a 486 series processor, either Intel or one of the - compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, - DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or - U5S. - -config M586 - bool "586/K5/5x86/6x86/6x86MX" - help - Select this for an 586 or 686 series processor such as the AMD K5, - the Cyrix 5x86, 6x86 and 6x86MX. This choice does not - assume the RDTSC (Read Time Stamp Counter) instruction. - -config M586TSC - bool "Pentium-Classic" - help - Select this for a Pentium Classic processor with the RDTSC (Read - Time Stamp Counter) instruction for benchmarking. - -config M586MMX - bool "Pentium-MMX" - help - Select this for a Pentium with the MMX graphics/multimedia - extended instructions. - -config M686 - bool "Pentium-Pro" - help - Select this for Intel Pentium Pro chips. This enables the use of - Pentium Pro extended instructions, and disables the init-time guard - against the f00f bug found in earlier Pentiums. - -config MPENTIUMII - bool "Pentium-II/Celeron(pre-Coppermine)" - help - Select this for Intel chips based on the Pentium-II and - pre-Coppermine Celeron core. This option enables an unaligned - copy optimization, compiles the kernel with optimization flags - tailored for the chip, and applies any applicable Pentium Pro - optimizations. - -config MPENTIUMIII - bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" - help - Select this for Intel chips based on the Pentium-III and - Celeron-Coppermine core. This option enables use of some - extended prefetch instructions in addition to the Pentium II - extensions. - -config MPENTIUMM - bool "Pentium M" - help - Select this for Intel Pentium M (not Pentium-4 M) - notebook chips. - -config MPENTIUM4 - bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" - help - Select this for Intel Pentium 4 chips. This includes the - Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M - (not Pentium M) chips. This option enables compile flags - optimized for the chip, uses the correct cache shift, and - applies any applicable Pentium III optimizations. - -config MK6 - bool "K6/K6-II/K6-III" - help - Select this for an AMD K6-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. - -config MK7 - bool "Athlon/Duron/K7" - help - Select this for an AMD Athlon K7-family processor. Enables use of - some extended instructions, and passes appropriate optimization - flags to GCC. - -config MK8 - bool "Opteron/Athlon64/Hammer/K8" - help - Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables - use of some extended instructions, and passes appropriate optimization - flags to GCC. - -config MCRUSOE - bool "Crusoe" - help - Select this for a Transmeta Crusoe processor. Treats the processor - like a 586 with TSC, and sets some GCC optimization flags (like a - Pentium Pro with no alignment requirements). - -config MEFFICEON - bool "Efficeon" - help - Select this for a Transmeta Efficeon processor. - -config MWINCHIPC6 - bool "Winchip-C6" - help - Select this for an IDT Winchip C6 chip. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. - -config MWINCHIP2 - bool "Winchip-2" - help - Select this for an IDT Winchip-2. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment requirements. - -config MWINCHIP3D - bool "Winchip-2A/Winchip-3" - help - Select this for an IDT Winchip-2A or 3. Linux and GCC - treat this chip as a 586TSC with some extended instructions - and alignment reqirements. Also enable out of order memory - stores for this CPU, which can increase performance of some - operations. - -config MGEODEGX1 - bool "GeodeGX1" - help - Select this for a Geode GX1 (Cyrix MediaGX) chip. - -config MCYRIXIII - bool "CyrixIII/VIA-C3" - help - Select this for a Cyrix III or C3 chip. Presently Linux and GCC - treat this chip as a generic 586. Whilst the CPU is 686 class, - it lacks the cmov extension which gcc assumes is present when - generating 686 code. - Note that Nehemiah (Model 9) and above will not boot with this - kernel due to them lacking the 3DNow! instructions used in earlier - incarnations of the CPU. - -config MVIAC3_2 - bool "VIA C3-2 (Nehemiah)" - help - Select this for a VIA C3 "Nehemiah". Selecting this enables usage - of SSE and tells gcc to treat the CPU as a 686. - Note, this kernel will not boot on older (pre model 9) C3s. - -endchoice - -config X86_GENERIC - bool "Generic x86 support" - help - Instead of just including optimizations for the selected - x86 variant (e.g. PII, Crusoe or Athlon), include some more - generic optimizations as well. This will make the kernel - perform better on x86 CPUs other than that selected. - - This is really intended for distributors who need more - generic optimizations. - -endif - -# -# Define implied options from the CPU selection here -# -config X86_CMPXCHG - bool - depends on !M386 - default y - -config X86_XADD - bool - depends on !M386 - default y - -config X86_L1_CACHE_SHIFT - int - default "7" if MPENTIUM4 || X86_GENERIC - default "4" if X86_ELAN || M486 || M386 - default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 - default "6" if MK7 || MK8 || MPENTIUMM - -config RWSEM_GENERIC_SPINLOCK - bool - depends on M386 - default y - -config RWSEM_XCHGADD_ALGORITHM - bool - depends on !M386 - default y - -config GENERIC_CALIBRATE_DELAY - bool - default y - -config X86_PPRO_FENCE - bool - depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 - default y - -config X86_F00F_BUG - bool - depends on M586MMX || M586TSC || M586 || M486 || M386 - default y - -config X86_WP_WORKS_OK - bool - depends on !M386 - default y - -config X86_INVLPG - bool - depends on !M386 - default y - -config X86_BSWAP - bool - depends on !M386 - default y - -config X86_POPAD_OK - bool - depends on !M386 - default y - -config X86_CMPXCHG64 - bool - depends on !M386 && !M486 - default y - -config X86_ALIGNMENT_16 - bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 - default y - -config X86_GOOD_APIC - bool - depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON - default y - -config X86_INTEL_USERCOPY - bool - depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON - default y - -config X86_USE_PPRO_CHECKSUM - bool - depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON - default y - -config X86_USE_3DNOW - bool - depends on MCYRIXIII || MK7 - default y - -config X86_OOSTORE - bool - depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR - default y +source "arch/i386/Kconfig.cpu" config HPET_TIMER bool "HPET Timer Support" @@ -570,11 +268,6 @@ config X86_VISWS_APIC depends on X86_VISWS default y -config X86_TSC - bool - depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ - default y - config X86_MCE bool "Machine Check Exception" depends on !X86_VOYAGER diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu new file mode 100644 index 0000000..53bbb3c --- /dev/null +++ b/arch/i386/Kconfig.cpu @@ -0,0 +1,309 @@ +# Put here option for CPU selection and depending optimization +if !X86_ELAN + +choice + prompt "Processor family" + default M686 + +config M386 + bool "386" + ---help--- + This is the processor type of your CPU. This information is used for + optimizing purposes. In order to compile a kernel that can run on + all x86 CPU types (albeit not optimally fast), you can specify + "386" here. + + The kernel will not necessarily run on earlier architectures than + the one you have chosen, e.g. a Pentium optimized kernel will run on + a PPro, but not necessarily on a i486. + + Here are the settings recommended for greatest speed: + - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI + 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels + will run on a 386 class machine. + - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or + SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S. + - "586" for generic Pentium CPUs lacking the TSC + (time stamp counter) register. + - "Pentium-Classic" for the Intel Pentium. + - "Pentium-MMX" for the Intel Pentium MMX. + - "Pentium-Pro" for the Intel Pentium Pro. + - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron. + - "Pentium-III" for the Intel Pentium III or Coppermine Celeron. + - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron. + - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D). + - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird). + - "Crusoe" for the Transmeta Crusoe series. + - "Efficeon" for the Transmeta Efficeon series. + - "Winchip-C6" for original IDT Winchip. + - "Winchip-2" for IDT Winchip 2. + - "Winchip-2A" for IDT Winchips with 3dNow! capabilities. + - "GeodeGX1" for Geode GX1 (Cyrix MediaGX). + - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. + - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above). + + If you don't know what to do, choose "386". + +config M486 + bool "486" + help + Select this for a 486 series processor, either Intel or one of the + compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX, + DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or + U5S. + +config M586 + bool "586/K5/5x86/6x86/6x86MX" + help + Select this for an 586 or 686 series processor such as the AMD K5, + the Cyrix 5x86, 6x86 and 6x86MX. This choice does not + assume the RDTSC (Read Time Stamp Counter) instruction. + +config M586TSC + bool "Pentium-Classic" + help + Select this for a Pentium Classic processor with the RDTSC (Read + Time Stamp Counter) instruction for benchmarking. + +config M586MMX + bool "Pentium-MMX" + help + Select this for a Pentium with the MMX graphics/multimedia + extended instructions. + +config M686 + bool "Pentium-Pro" + help + Select this for Intel Pentium Pro chips. This enables the use of + Pentium Pro extended instructions, and disables the init-time guard + against the f00f bug found in earlier Pentiums. + +config MPENTIUMII + bool "Pentium-II/Celeron(pre-Coppermine)" + help + Select this for Intel chips based on the Pentium-II and + pre-Coppermine Celeron core. This option enables an unaligned + copy optimization, compiles the kernel with optimization flags + tailored for the chip, and applies any applicable Pentium Pro + optimizations. + +config MPENTIUMIII + bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon" + help + Select this for Intel chips based on the Pentium-III and + Celeron-Coppermine core. This option enables use of some + extended prefetch instructions in addition to the Pentium II + extensions. + +config MPENTIUMM + bool "Pentium M" + help + Select this for Intel Pentium M (not Pentium-4 M) + notebook chips. + +config MPENTIUM4 + bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon" + help + Select this for Intel Pentium 4 chips. This includes the + Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M + (not Pentium M) chips. This option enables compile flags + optimized for the chip, uses the correct cache shift, and + applies any applicable Pentium III optimizations. + +config MK6 + bool "K6/K6-II/K6-III" + help + Select this for an AMD K6-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK7 + bool "Athlon/Duron/K7" + help + Select this for an AMD Athlon K7-family processor. Enables use of + some extended instructions, and passes appropriate optimization + flags to GCC. + +config MK8 + bool "Opteron/Athlon64/Hammer/K8" + help + Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables + use of some extended instructions, and passes appropriate optimization + flags to GCC. + +config MCRUSOE + bool "Crusoe" + help + Select this for a Transmeta Crusoe processor. Treats the processor + like a 586 with TSC, and sets some GCC optimization flags (like a + Pentium Pro with no alignment requirements). + +config MEFFICEON + bool "Efficeon" + help + Select this for a Transmeta Efficeon processor. + +config MWINCHIPC6 + bool "Winchip-C6" + help + Select this for an IDT Winchip C6 chip. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP2 + bool "Winchip-2" + help + Select this for an IDT Winchip-2. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment requirements. + +config MWINCHIP3D + bool "Winchip-2A/Winchip-3" + help + Select this for an IDT Winchip-2A or 3. Linux and GCC + treat this chip as a 586TSC with some extended instructions + and alignment reqirements. Also enable out of order memory + stores for this CPU, which can increase performance of some + operations. + +config MGEODEGX1 + bool "GeodeGX1" + help + Select this for a Geode GX1 (Cyrix MediaGX) chip. + +config MCYRIXIII + bool "CyrixIII/VIA-C3" + help + Select this for a Cyrix III or C3 chip. Presently Linux and GCC + treat this chip as a generic 586. Whilst the CPU is 686 class, + it lacks the cmov extension which gcc assumes is present when + generating 686 code. + Note that Nehemiah (Model 9) and above will not boot with this + kernel due to them lacking the 3DNow! instructions used in earlier + incarnations of the CPU. + +config MVIAC3_2 + bool "VIA C3-2 (Nehemiah)" + help + Select this for a VIA C3 "Nehemiah". Selecting this enables usage + of SSE and tells gcc to treat the CPU as a 686. + Note, this kernel will not boot on older (pre model 9) C3s. + +endchoice + +config X86_GENERIC + bool "Generic x86 support" + help + Instead of just including optimizations for the selected + x86 variant (e.g. PII, Crusoe or Athlon), include some more + generic optimizations as well. This will make the kernel + perform better on x86 CPUs other than that selected. + + This is really intended for distributors who need more + generic optimizations. + +endif + +# +# Define implied options from the CPU selection here +# +config X86_CMPXCHG + bool + depends on !M386 + default y + +config X86_XADD + bool + depends on !M386 + default y + +config X86_L1_CACHE_SHIFT + int + default "7" if MPENTIUM4 || X86_GENERIC + default "4" if X86_ELAN || M486 || M386 + default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1 + default "6" if MK7 || MK8 || MPENTIUMM + +config RWSEM_GENERIC_SPINLOCK + bool + depends on M386 + default y + +config RWSEM_XCHGADD_ALGORITHM + bool + depends on !M386 + default y + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config X86_PPRO_FENCE + bool + depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1 + default y + +config X86_F00F_BUG + bool + depends on M586MMX || M586TSC || M586 || M486 || M386 + default y + +config X86_WP_WORKS_OK + bool + depends on !M386 + default y + +config X86_INVLPG + bool + depends on !M386 + default y + +config X86_BSWAP + bool + depends on !M386 + default y + +config X86_POPAD_OK + bool + depends on !M386 + default y + +config X86_CMPXCHG64 + bool + depends on !M386 && !M486 + default y + +config X86_ALIGNMENT_16 + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 + default y + +config X86_GOOD_APIC + bool + depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON + default y + +config X86_INTEL_USERCOPY + bool + depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON + default y + +config X86_USE_PPRO_CHECKSUM + bool + depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON + default y + +config X86_USE_3DNOW + bool + depends on MCYRIXIII || MK7 + default y + +config X86_OOSTORE + bool + depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR + default y + +config X86_TSC + bool + depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1) && !X86_NUMAQ + default y diff --git a/arch/i386/Makefile b/arch/i386/Makefile index 0995199..d121ea1 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile @@ -34,35 +34,8 @@ CFLAGS += -pipe -msoft-float # prevent gcc from keeping the stack 16 byte aligned CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) -align := $(cc-option-align) -cflags-$(CONFIG_M386) += -march=i386 -cflags-$(CONFIG_M486) += -march=i486 -cflags-$(CONFIG_M586) += -march=i586 -cflags-$(CONFIG_M586TSC) += -march=i586 -cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) -cflags-$(CONFIG_M686) += -march=i686 -cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call cc-option,-mtune=pentium2) -cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call cc-option,-mtune=pentium3) -cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call cc-option,-mtune=pentium3) -cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call cc-option,-mtune=pentium4) -cflags-$(CONFIG_MK6) += -march=k6 -# Please note, that patches that add -march=athlon-xp and friends are pointless. -# They make zero difference whatsosever to performance at this time. -cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) -cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) -cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) -cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) -cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) -cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) - -# AMD Elan support -cflags-$(CONFIG_X86_ELAN) += -march=i486 - -# Geode GX1 support -cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) +# CPU-specific tuning. Anything which can be shared with UML should go here. +include $(srctree)/arch/i386/Makefile.cpu # -mregparm=3 works ok on gcc-3.0 and later # diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu new file mode 100644 index 0000000..86c7bb1 --- /dev/null +++ b/arch/i386/Makefile.cpu @@ -0,0 +1,33 @@ +# CPU tuning section - shared with UML. +# Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. + +align := $(cc-option-align) +cflags-$(CONFIG_M386) += -march=i386 +cflags-$(CONFIG_M486) += -march=i486 +cflags-$(CONFIG_M586) += -march=i586 +cflags-$(CONFIG_M586TSC) += -march=i586 +cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) +cflags-$(CONFIG_M686) += -march=i686 +cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call cc-option,-mtune=pentium2) +cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call cc-option,-mtune=pentium3) +cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call cc-option,-mtune=pentium3) +cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call cc-option,-mtune=pentium4) +cflags-$(CONFIG_MK6) += -march=k6 +# Please note, that patches that add -march=athlon-xp and friends are pointless. +# They make zero difference whatsosever to performance at this time. +cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) +cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) +cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) +cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) +cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) + +# AMD Elan support +cflags-$(CONFIG_X86_ELAN) += -march=i486 + +# Geode GX1 support +cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486) + -- cgit v1.1 From d89ea9b8bb4c4ad63122cd2d2ee5110a52da51b8 Mon Sep 17 00:00:00 2001 From: Paolo 'Blaisorblade' Giarrusso Date: Sun, 30 Oct 2005 15:00:09 -0800 Subject: [PATCH] i386: use -mcpu, not -mtune, for GCCs older than 3.4 I just noted that -mtune is used, which is only supported on recent GCCs; by reading http://gcc.gnu.org/gcc-3.4/changes.html, you see "-mcpu has been renamed to -mtune.", so for GCC < 3.4 we're not using any specific tuning in the appropriate cases. However -mcpu is deprecated, so use -mtune when possible. This was introduced by commit e9d4dce954a60dc23dd1d967766ca2347b780e54 of the old tree (between 2.6.10-rc3 and 2.6.10) by Linus Torvalds, to remove the use of -march, since that could trigger gcc using SSE on its own. But no attention was used about using -mcpu vs. -mtune. And btw, the old 2.6.4 code (for instance) was: cflags-$(CONFIG_MPENTIUMII) += $(call check_gcc,-march=pentium2,-march=i686) cflags-$(CONFIG_MPENTIUMIII) += $(call check_gcc,-march=pentium3,-march=i686) cflags-$(CONFIG_MPENTIUMM) += $(call check_gcc,-march=pentium3,-march=i686) cflags-$(CONFIG_MPENTIUM4) += $(call check_gcc,-march=pentium4,-march=i686) Signed-off-by: Paolo 'Blaisorblade' Giarrusso Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Makefile.cpu | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu index 86c7bb1..8e51456 100644 --- a/arch/i386/Makefile.cpu +++ b/arch/i386/Makefile.cpu @@ -1,6 +1,14 @@ # CPU tuning section - shared with UML. # Must change only cflags-y (or [yn]), not CFLAGS! That makes a difference for UML. +#-mtune exists since gcc 3.4, and some -mcpu flavors didn't exist in gcc 2.95. +HAS_MTUNE := $(call cc-option-yn, -mtune=i386) +ifeq ($(HAS_MTUNE),y) +tune = $(call cc-option,-mtune=$(1),) +else +tune = $(call cc-option,-mcpu=$(1),) +endif + align := $(cc-option-align) cflags-$(CONFIG_M386) += -march=i386 cflags-$(CONFIG_M486) += -march=i486 @@ -8,17 +16,17 @@ cflags-$(CONFIG_M586) += -march=i586 cflags-$(CONFIG_M586TSC) += -march=i586 cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586) cflags-$(CONFIG_M686) += -march=i686 -cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call cc-option,-mtune=pentium2) -cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call cc-option,-mtune=pentium3) -cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call cc-option,-mtune=pentium3) -cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call cc-option,-mtune=pentium4) +cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call tune,pentium2) +cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call tune,pentium3) +cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call tune,pentium4) cflags-$(CONFIG_MK6) += -march=k6 # Please note, that patches that add -march=athlon-xp and friends are pointless. # They make zero difference whatsosever to performance at this time. cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4) cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)) cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 -cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 +cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call tune,pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586) cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) -- cgit v1.1 From dfb7dac3af623a68262536437af008ed6aba4d88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Oct 2005 15:02:22 -0800 Subject: [PATCH] unify sys_ptrace prototype Make sure we always return, as all syscalls should. Also move the common prototype to Signed-off-by: Christoph Hellwig Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/ptrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 7b6368b..efd11f0 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -354,7 +354,7 @@ ptrace_set_thread_area(struct task_struct *child, return 0; } -asmlinkage int sys_ptrace(long request, long pid, long addr, long data) +asmlinkage long sys_ptrace(long request, long pid, long addr, long data) { struct task_struct *child; struct user * dummy = NULL; -- cgit v1.1 From ecea8d19c9f0ebd62ddaa07fc919ff4e4b820d99 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 30 Oct 2005 15:03:00 -0800 Subject: [PATCH] jiffies_64 cleanup Define jiffies_64 in kernel/timer.c rather than having 24 duplicated defines in each architecture. Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/time.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 46c35ec..07471bb 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -74,10 +74,6 @@ int pit_latch_buggy; /* extern */ #include "do_timer.h" -u64 jiffies_64 = INITIAL_JIFFIES; - -EXPORT_SYMBOL(jiffies_64); - unsigned int cpu_khz; /* Detected as we calibrate the TSC */ EXPORT_SYMBOL(cpu_khz); -- cgit v1.1 From 874ec33ff9ccf3651590697a2c2923b911bf31d0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 30 Oct 2005 15:03:29 -0800 Subject: [PATCH] sparse cleanups: NULL pointers, C99 struct init. Convert most of the remaining "Using plain integer as NULL pointer" sparse warnings to use NULL. (Not duplicating patches that are already in -mm, -bird, or -kj.) Convert isdn driver struct initializer to use C99 syntax. Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/reboot_fixups.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 1b183b3..c9b8733 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -44,7 +44,7 @@ void mach_reboot_fixups(void) for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) { cur = &(fixups_table[i]); - dev = pci_get_device(cur->vendor, cur->device, 0); + dev = pci_get_device(cur->vendor, cur->device, NULL); if (!dev) continue; -- cgit v1.1 From f00c96f313b07d2eb2845305b9a3395e14385767 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 30 Oct 2005 15:03:35 -0800 Subject: [PATCH] hpet-RTC: disable interrupt when no longer needed When the emulated RTC interrupt is no longer needed, we better disable it; otherwise, we get a spurious interrupt whenever the timer has rolled over and reaches the same comparator value. Having a superfluous interrupt every five minutes doesn't hurt much, but it's bad style anyway. ;-) Signed-off-by: Clemens Ladisch Acked-by: "Pallipadi, Venkatesh" Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/time_hpet.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 658c062..23e81fc 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -319,8 +319,12 @@ static void hpet_rtc_timer_reinit(void) { unsigned int cfg, cnt; - if (!(PIE_on | AIE_on | UIE_on)) + if (unlikely(!(PIE_on | AIE_on | UIE_on))) { + cfg = hpet_readl(HPET_T1_CFG); + cfg &= ~HPET_TN_ENABLE; + hpet_writel(cfg, HPET_T1_CFG); return; + } if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) hpet_rtc_int_freq = PIE_freq; -- cgit v1.1 From 5f819949ee4e5a06c2e0054cbb42f3f0d170d779 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 30 Oct 2005 15:03:36 -0800 Subject: [PATCH] hpet-RTC: fix timer config register accesses Make sure that the RTC timer is in non-periodic mode; some stupid BIOS might have initialized it to periodic mode. Furthermore, don't set the SETVAL bit in the config register. This wouldn't have any effect unless the timer was in period mode (which it isn't), and then the actual timer frequency would be half that of the desired one because incrementing the comparator in the interrupt handler would be done after the hardware has already incremented it itself. Signed-off-by: Clemens Ladisch Acked-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/time_hpet.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index 23e81fc..cb1f313 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -309,7 +309,8 @@ int hpet_rtc_timer_init(void) local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; + cfg &= ~HPET_TN_PERIODIC; + cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; hpet_writel(cfg, HPET_T1_CFG); return 1; @@ -335,12 +336,6 @@ static void hpet_rtc_timer_reinit(void) cnt = hpet_readl(HPET_T1_CMP); cnt += hpet_tick*HZ/hpet_rtc_int_freq; hpet_writel(cnt, HPET_T1_CMP); - - cfg = hpet_readl(HPET_T1_CFG); - cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; - hpet_writel(cfg, HPET_T1_CFG); - - return; } /* -- cgit v1.1 From 7811fb8f400a3dbfa027d86bb583a31c66fddfc3 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Sun, 30 Oct 2005 15:03:36 -0800 Subject: [PATCH] hpet-RTC: cache the comparator register Reads from an HPET register require a round trip to the south bridge and are almost as slow as PCI reads. By caching the last value we've written to the comparator register, we can eliminate all HPET reads from the fast path in the emulated RTC interrupt handler. Signed-off-by: Clemens Ladisch Acked-by: Venkatesh Pallipadi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/time_hpet.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/time_hpet.c b/arch/i386/kernel/time_hpet.c index cb1f313..9caeaa3 100644 --- a/arch/i386/kernel/time_hpet.c +++ b/arch/i386/kernel/time_hpet.c @@ -275,6 +275,7 @@ static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; static unsigned long PIE_count; static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ +static unsigned int hpet_t1_cmp; /* cached comparator register */ /* * Timer 1 for RTC, we do not use periodic interrupt feature, @@ -306,6 +307,7 @@ int hpet_rtc_timer_init(void) cnt = hpet_readl(HPET_COUNTER); cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); hpet_writel(cnt, HPET_T1_CMP); + hpet_t1_cmp = cnt; local_irq_restore(flags); cfg = hpet_readl(HPET_T1_CFG); @@ -333,9 +335,10 @@ static void hpet_rtc_timer_reinit(void) hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; /* It is more accurate to use the comparator value than current count.*/ - cnt = hpet_readl(HPET_T1_CMP); + cnt = hpet_t1_cmp; cnt += hpet_tick*HZ/hpet_rtc_int_freq; hpet_writel(cnt, HPET_T1_CMP); + hpet_t1_cmp = cnt; } /* -- cgit v1.1 From 4e57b6817880946a3a78d5d8cad1ace363f7e449 Mon Sep 17 00:00:00 2001 From: Tim Schmielau Date: Sun, 30 Oct 2005 15:03:48 -0800 Subject: [PATCH] fix missing includes I recently picked up my older work to remove unnecessary #includes of sched.h, starting from a patch by Dave Jones to not include sched.h from module.h. This reduces the number of indirect includes of sched.h by ~300. Another ~400 pointless direct includes can be removed after this disentangling (patch to follow later). However, quite a few indirect includes need to be fixed up for this. In order to feed the patches through -mm with as little disturbance as possible, I've split out the fixes I accumulated up to now (complete for i386 and x86_64, more archs to follow later) and post them before the real patch. This way this large part of the patch is kept simple with only adding #includes, and all hunks are independent of each other. So if any hunk rejects or gets in the way of other patches, just drop it. My scripts will pick it up again in the next round. Signed-off-by: Tim Schmielau Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c | 1 + arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | 1 + arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 1 + arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 1 + arch/i386/kernel/cpu/intel_cacheinfo.c | 1 + 5 files changed, 5 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 822c8ce..caa9f77 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -32,6 +32,7 @@ #include #include #include +#include /* current */ #include #include #include diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index aa622d5..270f218 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -28,6 +28,7 @@ #include #include #include +#include /* current / set_cpus_allowed() */ #include #include diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 58ca98f..2d5c9ad 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -32,6 +32,7 @@ #include #include #include +#include /* for current / set_cpus_allowed() */ #include #include diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index c397b62..14659742 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -22,6 +22,7 @@ #include #include #include +#include /* current */ #include #include diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index f083933..4dc42a1 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include -- cgit v1.1 From f2c84c0e84bfa637a7161eac10157cf3b05b4a73 Mon Sep 17 00:00:00 2001 From: Arthur Othieno Date: Sun, 30 Oct 2005 23:04:05 -0500 Subject: [PATCH] i386: CONFIG_PC removal CONFIG_PC is left-over cruft after the introduction of CONFIG_X86_PC with the subarch split. Remove it, and fixup the remaining users to depend on CONFIG_X86_PC instead. Signed-off-by: Arthur Othieno Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 5383e5e..bac0da7 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1042,8 +1042,3 @@ config X86_TRAMPOLINE bool depends on X86_SMP || (X86_VOYAGER && SMP) default y - -config PC - bool - depends on X86 && !EMBEDDED - default y -- cgit v1.1 From 1e4c85f97fe26fbd70da12148b3992c0e00361fd Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 31 Oct 2005 19:16:17 -0800 Subject: Revert "i386: move apic init in init_IRQs" Commit f2b36db692b7ff6972320ad9839ae656a3b0ee3e causes a bootup hang on at least one machine. Revert for now until we understand why. The old code may be ugly, but it works. Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 77 ++++++++++------------------------------------ arch/i386/kernel/i8259.c | 4 --- arch/i386/kernel/io_apic.c | 6 +--- arch/i386/kernel/smpboot.c | 68 ++++++++++++++++++++++++++++++---------- arch/i386/kernel/time.c | 12 +------- 5 files changed, 70 insertions(+), 97 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 9204be6..7c724ff 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -803,7 +803,6 @@ no_apic: void __init init_apic_mappings(void) { - unsigned int orig_apicid; unsigned long apic_phys; /* @@ -825,11 +824,8 @@ void __init init_apic_mappings(void) * Fetch the APIC ID of the BSP in case we have a * default configuration (or the MP table is broken). */ - orig_apicid = boot_cpu_physical_apicid; - boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); - if ((orig_apicid != -1U) && (orig_apicid != boot_cpu_physical_apicid)) - printk(KERN_WARNING "Boot APIC ID in local APIC unexpected (%d vs %d)", - orig_apicid, boot_cpu_physical_apicid); + if (boot_cpu_physical_apicid == -1U) + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); #ifdef CONFIG_X86_IO_APIC { @@ -1259,81 +1255,40 @@ fastcall void smp_error_interrupt(struct pt_regs *regs) } /* - * This initializes the IO-APIC and APIC hardware. + * This initializes the IO-APIC and APIC hardware if this is + * a UP kernel. */ -int __init APIC_init(void) +int __init APIC_init_uniprocessor (void) { - if (enable_local_apic < 0) { - printk(KERN_INFO "APIC disabled\n"); - return -1; - } + if (enable_local_apic < 0) + clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); - /* See if we have a SMP configuration or have forced enabled - * the local apic. - */ - if (!smp_found_config && !acpi_lapic && !cpu_has_apic) { - enable_local_apic = -1; + if (!smp_found_config && !cpu_has_apic) return -1; - } /* - * Complain if the BIOS pretends there is an apic. - * Then get out because we don't have an a local apic. + * Complain if the BIOS pretends there is one. */ if (!cpu_has_apic && APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", boot_cpu_physical_apicid); - printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); - enable_local_apic = -1; return -1; } verify_local_APIC(); - /* - * Should not be necessary because the MP table should list the boot - * CPU too, but we do it for the sake of robustness anyway. - * Makes no sense to do this check in clustered apic mode, so skip it - */ - if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { - printk("weird, boot CPU (#%d) not listed by the BIOS.\n", - boot_cpu_physical_apicid); - physid_set(boot_cpu_physical_apicid, phys_cpu_present_map); - } - - /* - * Switch from PIC to APIC mode. - */ connect_bsp_APIC(); - setup_local_APIC(); -#ifdef CONFIG_X86_IO_APIC - /* - * Now start the IO-APICs - */ - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - setup_IO_APIC(); -#endif - return 0; -} + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); -void __init APIC_late_time_init(void) -{ - /* Improve our loops per jiffy estimate */ - loops_per_jiffy = ((1000 + HZ - 1)/HZ)*cpu_khz; - boot_cpu_data.loops_per_jiffy = loops_per_jiffy; - cpu_data[0].loops_per_jiffy = loops_per_jiffy; - - /* setup_apic_nmi_watchdog doesn't work properly before cpu_khz is - * initialized. So redo it here to ensure the boot cpu is setup - * properly. - */ - if (nmi_watchdog == NMI_LOCAL_APIC) - setup_apic_nmi_watchdog(); + setup_local_APIC(); #ifdef CONFIG_X86_IO_APIC - if (smp_found_config && !skip_ioapic_setup && nr_ioapics) - IO_APIC_late_time_init(); + if (smp_found_config) + if (!skip_ioapic_setup && nr_ioapics) + setup_IO_APIC(); #endif setup_boot_APIC_clock(); + + return 0; } diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index d86f249..323ef8a 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c @@ -435,8 +435,4 @@ void __init init_IRQ(void) setup_irq(FPU_IRQ, &fpu_irq); irq_ctx_init(smp_processor_id()); - -#ifdef CONFIG_X86_LOCAL_APIC - APIC_init(); -#endif } diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 5a77c52..cc5d7ac 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -2387,15 +2387,11 @@ void __init setup_IO_APIC(void) sync_Arb_IDs(); setup_IO_APIC_irqs(); init_IO_APIC_traps(); + check_timer(); if (!acpi_ioapic) print_IO_APIC(); } -void __init IO_APIC_late_time_init(void) -{ - check_timer(); -} - /* * Called after all the initialization is done. If we didnt find any * APIC bugs then we can allow the modify fast path diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 5a2bbe0..01b618e 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1078,16 +1078,6 @@ void *xquad_portio; EXPORT_SYMBOL(xquad_portio); #endif -/* - * Fall back to non SMP mode after errors. - * - */ -static __init void disable_smp(void) -{ - cpu_set(0, cpu_sibling_map[0]); - cpu_set(0, cpu_core_map[0]); -} - static void __init smp_boot_cpus(unsigned int max_cpus) { int apicid, cpu, bit, kicked; @@ -1100,6 +1090,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) printk("CPU%d: ", 0); print_cpu_info(&cpu_data[0]); + boot_cpu_physical_apicid = GET_APIC_ID(apic_read(APIC_ID)); boot_cpu_logical_apicid = logical_smp_processor_id(); x86_cpu_to_apicid[0] = boot_cpu_physical_apicid; @@ -1111,27 +1102,68 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpus_clear(cpu_core_map[0]); cpu_set(0, cpu_core_map[0]); - map_cpu_to_logical_apicid(); - /* * If we couldn't find an SMP configuration at boot time, * get out of here now! */ if (!smp_found_config && !acpi_lapic) { printk(KERN_NOTICE "SMP motherboard not detected.\n"); - disable_smp(); + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + if (APIC_init_uniprocessor()) + printk(KERN_NOTICE "Local APIC not detected." + " Using dummy APIC emulation.\n"); + map_cpu_to_logical_apicid(); + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); + return; + } + + /* + * Should not be necessary because the MP table should list the boot + * CPU too, but we do it for the sake of robustness anyway. + * Makes no sense to do this check in clustered apic mode, so skip it + */ + if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { + printk("weird, boot CPU (#%d) not listed by the BIOS.\n", + boot_cpu_physical_apicid); + physid_set(hard_smp_processor_id(), phys_cpu_present_map); + } + + /* + * If we couldn't find a local APIC, then get out of here now! + */ + if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && !cpu_has_apic) { + printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", + boot_cpu_physical_apicid); + printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); return; } + verify_local_APIC(); + /* * If SMP should be disabled, then really disable it! */ - if (!max_cpus || (enable_local_apic < 0)) { - printk(KERN_INFO "SMP mode deactivated.\n"); - disable_smp(); + if (!max_cpus) { + smp_found_config = 0; + printk(KERN_INFO "SMP mode deactivated, forcing use of dummy APIC emulation.\n"); + smpboot_clear_io_apic_irqs(); + phys_cpu_present_map = physid_mask_of_physid(0); + cpu_set(0, cpu_sibling_map[0]); + cpu_set(0, cpu_core_map[0]); return; } + connect_bsp_APIC(); + setup_local_APIC(); + map_cpu_to_logical_apicid(); + + setup_portio_remap(); /* @@ -1212,6 +1244,10 @@ static void __init smp_boot_cpus(unsigned int max_cpus) cpu_set(0, cpu_sibling_map[0]); cpu_set(0, cpu_core_map[0]); + smpboot_setup_io_apic(); + + setup_boot_APIC_clock(); + /* * Synchronize the TSC with the AP */ diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 07471bb..41c5b2d 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -440,8 +440,8 @@ static int time_init_device(void) device_initcall(time_init_device); -extern void (*late_time_init)(void); #ifdef CONFIG_HPET_TIMER +extern void (*late_time_init)(void); /* Duplicate of time_init() below, with hpet_enable part added */ static void __init hpet_time_init(void) { @@ -458,11 +458,6 @@ static void __init hpet_time_init(void) printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); - -#ifdef CONFIG_X86_LOCAL_APIC - if (enable_local_apic >= 0) - APIC_late_time_init(); -#endif } #endif @@ -487,9 +482,4 @@ void __init time_init(void) printk(KERN_INFO "Using %s for high-res timesource\n",cur_timer->name); time_init_hook(); - -#ifdef CONFIG_X86_LOCAL_APIC - if (enable_local_apic >= 0) - late_time_init = APIC_late_time_init; -#endif } -- cgit v1.1 From 1d373741976985bc665e75aeb2b3bf7a524e36cc Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Fri, 28 Oct 2005 21:50:35 -0700 Subject: [PATCH] toshiba_ohci1394_dmi_table should be __devinitdata, not __devinit I don't really understand why gcc gives the error it does, but without this patch, when building with CONFIG_HOTPLUG=n, I get errors like: CC arch/x86_64/pci/../../i386/pci/fixup.o arch/x86_64/pci/../../i386/pci/fixup.c: In function `pci_fixup_i450nx': arch/x86_64/pci/../../i386/pci/fixup.c:13: error: pci_fixup_i450nx causes a section type conflict The change is obviously correct: an array should be declared __devinitdata rather that __devinit. Signed-off-by: Roland Dreier Acked-by: Martin J. Bligh Signed-off-by: Linus Torvalds --- arch/i386/pci/fixup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index 330fd2b..3984226 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -398,7 +398,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_video); */ static u16 toshiba_line_size; -static struct dmi_system_id __devinit toshiba_ohci1394_dmi_table[] = { +static struct dmi_system_id __devinitdata toshiba_ohci1394_dmi_table[] = { { .ident = "Toshiba PS5 based laptop", .matches = { -- cgit v1.1 From f912696ab330bf539231d1f8032320f2a08b850f Mon Sep 17 00:00:00 2001 From: Bart Oldeman Date: Sun, 6 Nov 2005 12:54:07 +1300 Subject: [PATCH] reset tss->io_bitmap_owner in sys_ioperm() my patch "x86: initialise tss->io_bitmap_owner to something" (commit ID d5cd4aadd3d220afac8e3e6d922e333592551f7d) introduced a problem with a program (DOSEMU) that called ioperm after already doing some port i/o. The problem is that a process switch return causes tss->io_bitmap_base to be set to IO_BITMAP_OFFSET so that the fault (that *really* sets the io bitmap) never triggers. This fixes that regression. Signed-off-by: Bart Oldeman Signed-off-by: Linus Torvalds --- arch/i386/kernel/ioport.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index f2b3765..b59a34d 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -108,8 +108,11 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) /* * Sets the lazy trigger so that the next I/O operation will * reload the correct bitmap. + * Reset the owner so that a process switch will not set + * tss->io_bitmap_base to IO_BITMAP_OFFSET. */ tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; + tss->io_bitmap_owner = NULL; put_cpu(); -- cgit v1.1 From 38e548ee1a79c8da7b3d9e26f2adce9b61413f84 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Mon, 7 Nov 2005 00:58:31 -0800 Subject: [PATCH] arch/i386: Use ARRAY_SIZE macro Use ARRAY_SIZE macro instead of sizeof(x)/sizeof(x[0]) Signed-off-by: Tobias Klauser Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 5 ++--- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 2 +- arch/i386/kernel/mca.c | 2 +- arch/i386/kernel/reboot_fixups.c | 2 +- arch/i386/kernel/smpboot.c | 2 +- 5 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index d2ef0c2..86e80c5 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -447,8 +447,7 @@ static char * apm_event_name[] = { "system standby resume", "capabilities change" }; -#define NR_APM_EVENT_NAME \ - (sizeof(apm_event_name) / sizeof(apm_event_name[0])) +#define NR_APM_EVENT_NAME ARRAY_SIZE(apm_event_name) typedef struct lookup_t { int key; @@ -479,7 +478,7 @@ static const lookup_t error_table[] = { { APM_NO_ERROR, "BIOS did not set a return code" }, { APM_NOT_PRESENT, "No APM present" } }; -#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t)) +#define ERROR_COUNT ARRAY_SIZE(error_table) /** * apm_error - display an APM error diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index 14659742..0ea010a 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -67,7 +67,7 @@ static const struct cpu_id cpu_ids[] = { [CPU_MP4HT_D0] = {15, 3, 4 }, [CPU_MP4HT_E0] = {15, 4, 1 }, }; -#define N_IDS (sizeof(cpu_ids)/sizeof(cpu_ids[0])) +#define N_IDS ARRAY_SIZE(cpu_ids) struct cpu_model { diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index 8600fae..558bb20 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c @@ -132,7 +132,7 @@ static struct resource mca_standard_resources[] = { { .start = 0x100, .end = 0x107, .name = "POS (MCA)" } }; -#define MCA_STANDARD_RESOURCES (sizeof(mca_standard_resources)/sizeof(struct resource)) +#define MCA_STANDARD_RESOURCES ARRAY_SIZE(mca_standard_resources) /** * mca_read_and_store_pos - read the POS registers into a memory buffer diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index c9b8733..6f73c9e 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -42,7 +42,7 @@ void mach_reboot_fixups(void) struct pci_dev *dev; int i; - for (i=0; i < (sizeof(fixups_table)/sizeof(fixups_table[0])); i++) { + for (i=0; i < ARRAY_SIZE(fixups_table); i++) { cur = &(fixups_table[i]); dev = pci_get_device(cur->vendor, cur->device, NULL); if (!dev) diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 01b618e..e6488ff 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -612,7 +612,7 @@ static inline void __inquire_remote_apic(int apicid) printk("Inquiring remote APIC #%d...\n", apicid); - for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) { + for (i = 0; i < ARRAY_SIZE(regs); i++) { printk("... APIC #%d %s: ", apicid, names[i]); /* -- cgit v1.1 From 77f72b192fd4624ad639dbf60c48be787c8aea59 Mon Sep 17 00:00:00 2001 From: Zwane Mwaikambo Date: Mon, 7 Nov 2005 00:58:33 -0800 Subject: [PATCH] i386: LVT entries remaining unmasked on reboot Excerpt from bugzilla entry http://bugzilla.kernel.org/show_bug.cgi?id=5518 "i386 version of Reboot-through-BIOS is unsafe: it forgets to mask APIC LVT interrupts before jumping to a BIOS entry point. As a result, BIOS ends up bombarded with interrupts early on boot. The BIOS does not expect it since following a "normal" hardware cpu reset, all APIC LVT registers have the Mask bit (16) set and can't generate interrupts. For example, the version of Phoenix BIOS used by VMware enables interrupts for the first time before masking/clearing APIC LVT. The APIC Timer LVT register is still set up for a timer interrupt delivery with a high vector from the previous Linux incarnation (0xef in our case). The BIOS has not fully initialized its IDT at this point and the real mode gate for 0xef remains all zeros. Vector 0xef dispatches BIOS to address 0:0, BIOS takes a #GP and eventually hangs. machine_shutdown() does attempt to shut down APIC before jumping to BIOS, but it is ineffective" Signed-off-by: Zwane Mwaikambo Cc: "Seth, Rohit" Cc: Zachary Amsden Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 7c724ff..496a2c9 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -559,14 +559,20 @@ void __devinit setup_local_APIC(void) * If Linux enabled the LAPIC against the BIOS default * disable it down before re-entering the BIOS on shutdown. * Otherwise the BIOS may get confused and not power-off. + * Additionally clear all LVT entries before disable_local_APIC + * for the case where Linux didn't enable the LAPIC. */ void lapic_shutdown(void) { - if (!cpu_has_apic || !enabled_via_apicbase) + if (!cpu_has_apic) return; local_irq_disable(); - disable_local_APIC(); + clear_local_APIC(); + + if (enabled_via_apicbase) + disable_local_APIC(); + local_irq_enable(); } -- cgit v1.1 From 8d1ed6366b9f3cb54eb5aef5dae79b39b8d5ce43 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Nov 2005 00:58:35 -0800 Subject: [PATCH] arch/i386/kernel/ldt.c should #include Every file should #include the header files containing the prototypes of its global functions Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/ldt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/ldt.c b/arch/i386/kernel/ldt.c index fe1ffa5..983f957 100644 --- a/arch/i386/kernel/ldt.c +++ b/arch/i386/kernel/ldt.c @@ -18,6 +18,7 @@ #include #include #include +#include #ifdef CONFIG_SMP /* avoids "defined but not used" warnig */ static void flush_ldt(void *null) -- cgit v1.1 From 5cc6135af73ad0c7897d1d00ff361e510ac23ccb Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Nov 2005 00:58:36 -0800 Subject: [PATCH] arch/i386/kernel/reboot_fixups.c should #include Every file should #include the header files containing the prototypes of its global functions Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/reboot_fixups.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 6f73c9e..10e21a4 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c @@ -10,6 +10,7 @@ #include #include +#include static void cs5530a_warm_reset(struct pci_dev *dev) { -- cgit v1.1 From cc658cfe3c66a6124b5a8db90cdcdd440201b1dc Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Nov 2005 00:58:37 -0800 Subject: [PATCH] arch/i386/kernel/scx200.c should #include Every file should #include the header files containing the prototypes of its global functions Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/scx200.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/scx200.c b/arch/i386/kernel/scx200.c index 69e203a..9c968ae 100644 --- a/arch/i386/kernel/scx200.c +++ b/arch/i386/kernel/scx200.c @@ -12,6 +12,7 @@ #include #include +#include /* Verify that the configuration block really is there */ #define scx200_cb_probe(base) (inw((base) + SCx200_CBA) == (base)) -- cgit v1.1 From 31ab269a0307d8725737dfbbdeb5dcde7b41bc36 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 7 Nov 2005 00:58:42 -0800 Subject: [PATCH] x86: add MCE resume It's widely seen a MCE non-fatal error reported after resume. It seems MCE resume is lacked under ia32. This patch tries to fix the gap. Signed-off-by: Shaohua Li Acked-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 5 +---- arch/i386/kernel/cpu/mcheck/k7.c | 2 +- arch/i386/kernel/cpu/mcheck/mce.c | 4 ++-- arch/i386/kernel/cpu/mcheck/p4.c | 4 ++-- arch/i386/kernel/cpu/mcheck/p5.c | 2 +- arch/i386/kernel/cpu/mcheck/p6.c | 2 +- arch/i386/kernel/cpu/mcheck/winchip.c | 2 +- arch/i386/power/cpu.c | 1 + 8 files changed, 10 insertions(+), 12 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 74145a3..c145fb3 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -30,8 +30,6 @@ static int disable_x86_serial_nr __devinitdata = 1; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; -extern void mcheck_init(struct cpuinfo_x86 *c); - extern int disable_pse; static void default_init(struct cpuinfo_x86 * c) @@ -429,9 +427,8 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) } /* Init Machine Check Exception if available. */ -#ifdef CONFIG_X86_MCE mcheck_init(c); -#endif + if (c == &boot_cpu_data) sysenter_setup(); enable_sep_cpu(); diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index 7c6b9c7..fc5d521 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c @@ -68,7 +68,7 @@ static fastcall void k7_machine_check(struct pt_regs * regs, long error_code) /* AMD K7 machine check is Intel like */ -void __devinit amd_mcheck_init(struct cpuinfo_x86 *c) +void amd_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 2cf25d2..6170af3 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -16,7 +16,7 @@ #include "mce.h" -int mce_disabled __devinitdata = 0; +int mce_disabled = 0; int nr_mce_banks; EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ @@ -31,7 +31,7 @@ static fastcall void unexpected_machine_check(struct pt_regs * regs, long error_ void fastcall (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; /* This has to be run for each processor */ -void __devinit mcheck_init(struct cpuinfo_x86 *c) +void mcheck_init(struct cpuinfo_x86 *c) { if (mce_disabled==1) return; diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 1d1e885..fd2c459 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c @@ -77,7 +77,7 @@ fastcall void smp_thermal_interrupt(struct pt_regs *regs) } /* P4/Xeon Thermal regulation detect and init */ -static void __devinit intel_init_thermal(struct cpuinfo_x86 *c) +static void intel_init_thermal(struct cpuinfo_x86 *c) { u32 l, h; unsigned int cpu = smp_processor_id(); @@ -231,7 +231,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } -void __devinit intel_p4_mcheck_init(struct cpuinfo_x86 *c) +void intel_p4_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/p5.c b/arch/i386/kernel/cpu/mcheck/p5.c index 3a2e24b..94bc43d 100644 --- a/arch/i386/kernel/cpu/mcheck/p5.c +++ b/arch/i386/kernel/cpu/mcheck/p5.c @@ -28,7 +28,7 @@ static fastcall void pentium_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting for processors with Intel style MCE */ -void __devinit intel_p5_mcheck_init(struct cpuinfo_x86 *c) +void intel_p5_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; diff --git a/arch/i386/kernel/cpu/mcheck/p6.c b/arch/i386/kernel/cpu/mcheck/p6.c index 979b18b..deeae42 100644 --- a/arch/i386/kernel/cpu/mcheck/p6.c +++ b/arch/i386/kernel/cpu/mcheck/p6.c @@ -79,7 +79,7 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) } /* Set up machine check reporting for processors with Intel style MCE */ -void __devinit intel_p6_mcheck_init(struct cpuinfo_x86 *c) +void intel_p6_mcheck_init(struct cpuinfo_x86 *c) { u32 l, h; int i; diff --git a/arch/i386/kernel/cpu/mcheck/winchip.c b/arch/i386/kernel/cpu/mcheck/winchip.c index 5b9d2dd..9e424b6 100644 --- a/arch/i386/kernel/cpu/mcheck/winchip.c +++ b/arch/i386/kernel/cpu/mcheck/winchip.c @@ -22,7 +22,7 @@ static fastcall void winchip_machine_check(struct pt_regs * regs, long error_cod } /* Set up machine check reporting on the Winchip C6 series */ -void __devinit winchip_mcheck_init(struct cpuinfo_x86 *c) +void winchip_mcheck_init(struct cpuinfo_x86 *c) { u32 lo, hi; machine_check_vector = winchip_machine_check; diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 1f15726..50a0bef 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c @@ -118,6 +118,7 @@ void __restore_processor_state(struct saved_context *ctxt) fix_processor_context(); do_fpu_end(); mtrr_ap_init(); + mcheck_init(&boot_cpu_data); } void restore_processor_state(void) -- cgit v1.1 From cd6b0762a04978baf48412456a687842de97e381 Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Mon, 7 Nov 2005 00:59:14 -0800 Subject: [PATCH] Move Kprobes and Oprofile to "Instrumentation Support" menu Andrew Morton suggested to move kprobes from kernel hacking menu, since kernel hacking menu is in-appropriate for the Kprobes. This patch moves Kprobes and Oprofile under instrumentation menu. (akpm: it's not a natural fit, but things like djprobes and the s390 guys' statistics library need a home) Signed-of-by: Prasanna S Panchamukhi Cc: Philippe Elie Cc: John Levon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 13 +++++++++++++ arch/i386/Kconfig.debug | 10 ---------- arch/i386/oprofile/Kconfig | 6 ------ 3 files changed, 13 insertions(+), 16 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index bac0da7..dbf90ad 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -997,8 +997,21 @@ source "drivers/Kconfig" source "fs/Kconfig" +menu "Instrumentation Support" + depends on EXPERIMENTAL + source "arch/i386/oprofile/Kconfig" +config KPROBES + bool "Kprobes (EXPERIMENTAL)" + help + Kprobes allows you to trap at almost any kernel address and + execute a callback function. register_kprobe() establishes + a probepoint and specifies the callback. Kprobes is useful + for kernel debugging, non-intrusive instrumentation and testing. + If in doubt, say "N". +endmenu + source "arch/i386/Kconfig.debug" source "security/Kconfig" diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index 5228c40..c48b424 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug @@ -22,16 +22,6 @@ config DEBUG_STACKOVERFLOW This option will cause messages to be printed if free stack space drops below a certain limit. -config KPROBES - bool "Kprobes" - depends on DEBUG_KERNEL - help - Kprobes allows you to trap at almost any kernel address and - execute a callback function. register_kprobe() establishes - a probepoint and specifies the callback. Kprobes is useful - for kernel debugging, non-intrusive instrumentation and testing. - If in doubt, say "N". - config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" depends on DEBUG_KERNEL diff --git a/arch/i386/oprofile/Kconfig b/arch/i386/oprofile/Kconfig index 5ade198..d8a8408 100644 --- a/arch/i386/oprofile/Kconfig +++ b/arch/i386/oprofile/Kconfig @@ -1,7 +1,3 @@ - -menu "Profiling support" - depends on EXPERIMENTAL - config PROFILING bool "Profiling support (EXPERIMENTAL)" help @@ -19,5 +15,3 @@ config OPROFILE If unsure, say N. -endmenu - -- cgit v1.1 From 481bed454247538e9f57d4ea37b153ccba24ba7b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Nov 2005 00:59:47 -0800 Subject: [PATCH] consolidate sys_ptrace() The sys_ptrace boilerplate code (everything outside the big switch statement for the arch-specific requests) is shared by most architectures. This patch moves it to kernel/ptrace.c and leaves the arch-specific code as arch_ptrace. Some architectures have a too different ptrace so we have to exclude them. They continue to keep their implementations. For sh64 I had to add a sh64_ptrace wrapper because it does some initialization on the first call. For um I removed an ifdefed SUBARCH_PTRACE_SPECIAL block, but SUBARCH_PTRACE_SPECIAL isn't defined anywhere in the tree. Signed-off-by: Christoph Hellwig Acked-by: Paul Mackerras Acked-by: Ralf Baechle Acked-By: David Howells Acked-by: Russell King Acked-by: Paul Mundt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/ptrace.c | 44 ++------------------------------------------ 1 file changed, 2 insertions(+), 42 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index efd11f0..5ffbb4b 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -354,49 +354,12 @@ ptrace_set_thread_area(struct task_struct *child, return 0; } -asmlinkage long sys_ptrace(long request, long pid, long addr, long data) +long arch_ptrace(struct task_struct *child, long request, long addr, long data) { - struct task_struct *child; struct user * dummy = NULL; int i, ret; unsigned long __user *datap = (unsigned long __user *)data; - lock_kernel(); - ret = -EPERM; - if (request == PTRACE_TRACEME) { - /* are we already being traced? */ - if (current->ptrace & PT_PTRACED) - goto out; - ret = security_ptrace(current->parent, current); - if (ret) - goto out; - /* set the ptrace bit in the process flags. */ - current->ptrace |= PT_PTRACED; - ret = 0; - goto out; - } - ret = -ESRCH; - read_lock(&tasklist_lock); - child = find_task_by_pid(pid); - if (child) - get_task_struct(child); - read_unlock(&tasklist_lock); - if (!child) - goto out; - - ret = -EPERM; - if (pid == 1) /* you may not mess with init */ - goto out_tsk; - - if (request == PTRACE_ATTACH) { - ret = ptrace_attach(child); - goto out_tsk; - } - - ret = ptrace_check_attach(child, request == PTRACE_KILL); - if (ret < 0) - goto out_tsk; - switch (request) { /* when I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ @@ -663,10 +626,7 @@ asmlinkage long sys_ptrace(long request, long pid, long addr, long data) ret = ptrace_request(child, request, addr, data); break; } -out_tsk: - put_task_struct(child); -out: - unlock_kernel(); + out_tsk: return ret; } -- cgit v1.1 From 66ff2d0691e00e1e7bfdf398a970310c9a0fe671 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 7 Nov 2005 01:00:07 -0800 Subject: [PATCH] Kprobes: rearrange preempt_disable/enable() calls The following set of patches are aimed at improving kprobes scalability. We currently serialize kprobe registration, unregistration and handler execution using a single spinlock - kprobe_lock. With these changes, kprobe handlers can run without any locks held. It also allows for simultaneous kprobe handler executions on different processors as we now track kprobe execution on a per processor basis. It is now necessary that the handlers be re-entrant since handlers can run concurrently on multiple processors. All changes have been tested on i386, ia64, ppc64 and x86_64, while sparc64 has been compile tested only. The patches can be viewed as 3 logical chunks: patch 1: Reorder preempt_(dis/en)able calls patches 2-7: Introduce per_cpu data areas to track kprobe execution patches 8-9: Use RCU to synchronize kprobe (un)registration and handler execution. Thanks to Maneesh Soni, James Keniston and Anil Keshavamurthy for their review and suggestions. Thanks again to Anil, Hien Nguyen and Kevin Stafford for testing the patches. This patch: Reorder preempt_disable/enable() calls in arch kprobes files in preparation to introduce locking changes. No functional changes introduced by this patch. Signed-off-by: Ananth N Mavinakayahanalli Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 6345b43..fd35039 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -158,8 +158,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) kprobe_opcode_t *addr = NULL; unsigned long *lp; - /* We're in an interrupt, but this is clear and BUG()-safe. */ - preempt_disable(); /* Check if the application is using LDT entry for its code segment and * calculate the address by reading the base address from the LDT entry. */ @@ -232,6 +230,11 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) goto no_kprobe; } + /* + * This preempt_disable() matches the preempt_enable_no_resched() + * in post_kprobe_handler() + */ + preempt_disable(); kprobe_status = KPROBE_HIT_ACTIVE; set_current_kprobe(p, regs); @@ -245,7 +248,6 @@ ss_probe: return 1; no_kprobe: - preempt_enable_no_resched(); return ret; } @@ -313,11 +315,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) unlock_kprobes(); preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption. - */ + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we have handled unlocking + * and re-enabling preemption + */ return 1; } @@ -453,29 +455,29 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data) { struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + preempt_disable(); switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; break; case DIE_DEBUG: if (post_kprobe_handler(args->regs)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; break; case DIE_GPF: - if (kprobe_running() && - kprobe_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; - break; case DIE_PAGE_FAULT: if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) - return NOTIFY_STOP; + ret = NOTIFY_STOP; break; default: break; } - return NOTIFY_DONE; + preempt_enable(); + return ret; } int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) @@ -502,7 +504,6 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) void __kprobes jprobe_return(void) { - preempt_enable_no_resched(); asm volatile (" xchgl %%ebx,%%esp \n" " int3 \n" " .globl jprobe_return_end \n" -- cgit v1.1 From 9a0e3a86837ac7542e601c18346102c9d9e65fa5 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 7 Nov 2005 01:00:08 -0800 Subject: [PATCH] Kprobes: Track kprobe on a per_cpu basis - i386 changes I386 changes to track kprobe execution on a per-cpu basis. We now track the kprobe state machine independently on each cpu, using an arch specific kprobe control block. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 126 +++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 57 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index fd35039..99565a66 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -37,16 +37,11 @@ #include #include -static struct kprobe *current_kprobe; -static unsigned long kprobe_status, kprobe_old_eflags, kprobe_saved_eflags; -static struct kprobe *kprobe_prev; -static unsigned long kprobe_status_prev, kprobe_old_eflags_prev, kprobe_saved_eflags_prev; -static struct pt_regs jprobe_saved_regs; -static long *jprobe_saved_esp; -/* copy of the kernel stack at the probe fire time */ -static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; void jprobe_return_end(void); +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -91,29 +86,30 @@ void __kprobes arch_remove_kprobe(struct kprobe *p) { } -static inline void save_previous_kprobe(void) +static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) { - kprobe_prev = current_kprobe; - kprobe_status_prev = kprobe_status; - kprobe_old_eflags_prev = kprobe_old_eflags; - kprobe_saved_eflags_prev = kprobe_saved_eflags; + kcb->prev_kprobe.kp = kprobe_running(); + kcb->prev_kprobe.status = kcb->kprobe_status; + kcb->prev_kprobe.old_eflags = kcb->kprobe_old_eflags; + kcb->prev_kprobe.saved_eflags = kcb->kprobe_saved_eflags; } -static inline void restore_previous_kprobe(void) +static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - current_kprobe = kprobe_prev; - kprobe_status = kprobe_status_prev; - kprobe_old_eflags = kprobe_old_eflags_prev; - kprobe_saved_eflags = kprobe_saved_eflags_prev; + __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + kcb->kprobe_status = kcb->prev_kprobe.status; + kcb->kprobe_old_eflags = kcb->prev_kprobe.old_eflags; + kcb->kprobe_saved_eflags = kcb->prev_kprobe.saved_eflags; } -static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) +static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, + struct kprobe_ctlblk *kcb) { - current_kprobe = p; - kprobe_saved_eflags = kprobe_old_eflags + __get_cpu_var(current_kprobe) = p; + kcb->kprobe_saved_eflags = kcb->kprobe_old_eflags = (regs->eflags & (TF_MASK | IF_MASK)); if (is_IF_modifier(p->opcode)) - kprobe_saved_eflags &= ~IF_MASK; + kcb->kprobe_saved_eflags &= ~IF_MASK; } static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) @@ -157,6 +153,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) int ret = 0; kprobe_opcode_t *addr = NULL; unsigned long *lp; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); /* Check if the application is using LDT entry for its code segment and * calculate the address by reading the base address from the LDT entry. @@ -175,10 +172,10 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { - if (kprobe_status == KPROBE_HIT_SS && + if (kcb->kprobe_status == KPROBE_HIT_SS && *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; - regs->eflags |= kprobe_saved_eflags; + regs->eflags |= kcb->kprobe_saved_eflags; unlock_kprobes(); goto no_kprobe; } @@ -188,14 +185,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) * just single step on the instruction of the new probe * without calling any user handlers. */ - save_previous_kprobe(); - set_current_kprobe(p, regs); + save_previous_kprobe(kcb); + set_current_kprobe(p, regs, kcb); p->nmissed++; prepare_singlestep(p, regs); - kprobe_status = KPROBE_REENTER; + kcb->kprobe_status = KPROBE_REENTER; return 1; } else { - p = current_kprobe; + p = __get_cpu_var(current_kprobe); if (p->break_handler && p->break_handler(p, regs)) { goto ss_probe; } @@ -235,8 +232,8 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) * in post_kprobe_handler() */ preempt_disable(); - kprobe_status = KPROBE_HIT_ACTIVE; - set_current_kprobe(p, regs); + set_current_kprobe(p, regs, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; if (p->pre_handler && p->pre_handler(p, regs)) /* handler has already set things up, so skip ss setup */ @@ -244,7 +241,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) ss_probe: prepare_singlestep(p, regs); - kprobe_status = KPROBE_HIT_SS; + kcb->kprobe_status = KPROBE_HIT_SS; return 1; no_kprobe: @@ -312,6 +309,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); regs->eip = orig_ret_address; + reset_current_kprobe(); unlock_kprobes(); preempt_enable_no_resched(); @@ -345,7 +343,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. */ -static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +static void __kprobes resume_execution(struct kprobe *p, + struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = (unsigned long *)®s->esp; unsigned long next_eip = 0; @@ -355,7 +354,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) switch (p->ainsn.insn[0]) { case 0x9c: /* pushfl */ *tos &= ~(TF_MASK | IF_MASK); - *tos |= kprobe_old_eflags; + *tos |= kcb->kprobe_old_eflags; break; case 0xc3: /* ret/lret */ case 0xcb: @@ -400,22 +399,26 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) */ static inline int post_kprobe_handler(struct pt_regs *regs) { - if (!kprobe_running()) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) return 0; - if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { - kprobe_status = KPROBE_HIT_SSDONE; - current_kprobe->post_handler(current_kprobe, regs, 0); + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); } - resume_execution(current_kprobe, regs); - regs->eflags |= kprobe_saved_eflags; + resume_execution(cur, regs, kcb); + regs->eflags |= kcb->kprobe_saved_eflags; /*Restore back the original saved kprobes variables and continue. */ - if (kprobe_status == KPROBE_REENTER) { - restore_previous_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); goto out; } + reset_current_kprobe(); unlock_kprobes(); out: preempt_enable_no_resched(); @@ -434,14 +437,17 @@ out: /* Interrupts disabled, kprobe_lock held. */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { - if (current_kprobe->fault_handler - && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) return 1; - if (kprobe_status & KPROBE_HIT_SS) { - resume_execution(current_kprobe, regs); - regs->eflags |= kprobe_old_eflags; + if (kcb->kprobe_status & KPROBE_HIT_SS) { + resume_execution(cur, regs, kcb); + regs->eflags |= kcb->kprobe_old_eflags; + reset_current_kprobe(); unlock_kprobes(); preempt_enable_no_resched(); } @@ -484,10 +490,11 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) { struct jprobe *jp = container_of(p, struct jprobe, kp); unsigned long addr; + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - jprobe_saved_regs = *regs; - jprobe_saved_esp = ®s->esp; - addr = (unsigned long)jprobe_saved_esp; + kcb->jprobe_saved_regs = *regs; + kcb->jprobe_saved_esp = ®s->esp; + addr = (unsigned long)(kcb->jprobe_saved_esp); /* * TBD: As Linus pointed out, gcc assumes that the callee @@ -496,7 +503,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * we also save and restore enough stack bytes to cover * the argument area. */ - memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); + memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, + MIN_STACK_SIZE(addr)); regs->eflags &= ~IF_MASK; regs->eip = (unsigned long)(jp->entry); return 1; @@ -504,34 +512,38 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) void __kprobes jprobe_return(void) { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + asm volatile (" xchgl %%ebx,%%esp \n" " int3 \n" " .globl jprobe_return_end \n" " jprobe_return_end: \n" " nop \n"::"b" - (jprobe_saved_esp):"memory"); + (kcb->jprobe_saved_esp):"memory"); } int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) { + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); u8 *addr = (u8 *) (regs->eip - 1); - unsigned long stack_addr = (unsigned long)jprobe_saved_esp; + unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_esp); struct jprobe *jp = container_of(p, struct jprobe, kp); if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { - if (®s->esp != jprobe_saved_esp) { + if (®s->esp != kcb->jprobe_saved_esp) { struct pt_regs *saved_regs = - container_of(jprobe_saved_esp, struct pt_regs, esp); + container_of(kcb->jprobe_saved_esp, + struct pt_regs, esp); printk("current esp %p does not match saved esp %p\n", - ®s->esp, jprobe_saved_esp); + ®s->esp, kcb->jprobe_saved_esp); printk("Saved registers for jprobe %p\n", jp); show_registers(saved_regs); printk("Current registers\n"); show_registers(regs); BUG(); } - *regs = jprobe_saved_regs; - memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, + *regs = kcb->jprobe_saved_regs; + memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); return 1; } -- cgit v1.1 From 991a51d83a3d9bebfafdd1e692cf310899d60791 Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 7 Nov 2005 01:00:14 -0800 Subject: [PATCH] Kprobes: Use RCU for (un)register synchronization - arch changes Changes to the arch kprobes infrastructure to take advantage of the locking changes introduced by usage of RCU for synchronization. All handlers are now run without any locks held, so they have to be re-entrant or provide their own synchronization. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 99565a66..ad46929 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include @@ -123,6 +122,7 @@ static inline void prepare_singlestep(struct kprobe *p, struct pt_regs *regs) regs->eip = (unsigned long)&p->ainsn.insn; } +/* Called with kretprobe_lock held */ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, struct pt_regs *regs) { @@ -168,15 +168,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) } /* Check we're not actually recursing */ if (kprobe_running()) { - /* We *are* holding lock here, so this is safe. - Disarm the probe we just hit, and ignore it. */ p = get_kprobe(addr); if (p) { if (kcb->kprobe_status == KPROBE_HIT_SS && *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { regs->eflags &= ~TF_MASK; regs->eflags |= kcb->kprobe_saved_eflags; - unlock_kprobes(); goto no_kprobe; } /* We have reentered the kprobe_handler(), since @@ -197,14 +194,11 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) goto ss_probe; } } - /* If it's not ours, can't be delete race, (we hold lock). */ goto no_kprobe; } - lock_kprobes(); p = get_kprobe(addr); if (!p) { - unlock_kprobes(); if (regs->eflags & VM_MASK) { /* We are in virtual-8086 mode. Return 0 */ goto no_kprobe; @@ -268,9 +262,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) struct kretprobe_instance *ri = NULL; struct hlist_head *head; struct hlist_node *node, *tmp; - unsigned long orig_ret_address = 0; + unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; + spin_lock_irqsave(&kretprobe_lock, flags); head = kretprobe_inst_table_head(current); /* @@ -310,7 +305,7 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) regs->eip = orig_ret_address; reset_current_kprobe(); - unlock_kprobes(); + spin_unlock_irqrestore(&kretprobe_lock, flags); preempt_enable_no_resched(); /* @@ -395,7 +390,7 @@ static void __kprobes resume_execution(struct kprobe *p, /* * Interrupts are disabled on entry as trap1 is an interrupt gate and they - * remain disabled thoroughout this function. And we hold kprobe lock. + * remain disabled thoroughout this function. */ static inline int post_kprobe_handler(struct pt_regs *regs) { @@ -419,7 +414,6 @@ static inline int post_kprobe_handler(struct pt_regs *regs) goto out; } reset_current_kprobe(); - unlock_kprobes(); out: preempt_enable_no_resched(); @@ -434,7 +428,6 @@ out: return 1; } -/* Interrupts disabled, kprobe_lock held. */ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) { struct kprobe *cur = kprobe_running(); @@ -448,7 +441,6 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) regs->eflags |= kcb->kprobe_old_eflags; reset_current_kprobe(); - unlock_kprobes(); preempt_enable_no_resched(); } return 0; @@ -463,7 +455,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - preempt_disable(); + rcu_read_lock(); switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) @@ -482,7 +474,7 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, default: break; } - preempt_enable(); + rcu_read_unlock(); return ret; } -- cgit v1.1 From d217d5450f11d8c907c0458d175b0dc999b4d06d Mon Sep 17 00:00:00 2001 From: Ananth N Mavinakayanahalli Date: Mon, 7 Nov 2005 01:00:14 -0800 Subject: [PATCH] Kprobes: preempt_disable/enable() simplification Reorganize the preempt_disable/enable calls to eliminate the extra preempt depth. Changes based on Paul McKenney's review suggestions for the kprobes RCU changeset. Signed-off-by: Ananth N Mavinakayanahalli Signed-off-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index ad46929..32b0c24 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -153,7 +153,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) int ret = 0; kprobe_opcode_t *addr = NULL; unsigned long *lp; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); /* Check if the application is using LDT entry for its code segment and * calculate the address by reading the base address from the LDT entry. @@ -221,11 +228,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) goto no_kprobe; } - /* - * This preempt_disable() matches the preempt_enable_no_resched() - * in post_kprobe_handler() - */ - preempt_disable(); set_current_kprobe(p, regs, kcb); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -239,6 +241,7 @@ ss_probe: return 1; no_kprobe: + preempt_enable_no_resched(); return ret; } @@ -310,8 +313,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* * By returning a non-zero value, we are telling - * kprobe_handler() that we have handled unlocking - * and re-enabling preemption + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) */ return 1; } @@ -455,7 +458,6 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; - rcu_read_lock(); switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) @@ -467,14 +469,16 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, break; case DIE_GPF: case DIE_PAGE_FAULT: + /* kprobe_running() needs smp_processor_id() */ + preempt_disable(); if (kprobe_running() && kprobe_fault_handler(args->regs, args->trapnr)) ret = NOTIFY_STOP; + preempt_enable(); break; default: break; } - rcu_read_unlock(); return ret; } @@ -537,6 +541,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) *regs = kcb->jprobe_saved_regs; memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, MIN_STACK_SIZE(stack_addr)); + preempt_enable_no_resched(); return 1; } return 0; -- cgit v1.1 From 5fed0578be842dd7d24e5240a75b02bbc748501f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Nov 2005 01:01:46 -0800 Subject: [PATCH] unexport phys_proc_id and cpu_core_id EXPORT_SYMBOL's for phys_proc_id and cpu_core_id were added this year but never used. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index e6488ff..47ec767 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -68,11 +68,9 @@ EXPORT_SYMBOL(smp_num_siblings); /* Package ID of each logical CPU */ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; -EXPORT_SYMBOL(phys_proc_id); /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; -EXPORT_SYMBOL(cpu_core_id); cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); -- cgit v1.1 From 5bfb5d690f36d316a5f3b4f7775fda996faa6b12 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 8 Nov 2005 21:39:01 -0800 Subject: [PATCH] sched: disable preempt in idle tasks Run idle threads with preempt disabled. Also corrected a bugs in arm26's cpu_idle (make it actually call schedule()). How did it ever work before? Might fix the CPU hotplugging hang which Nigel Cunningham noted. We think the bug hits if the idle thread is preempted after checking need_resched() and before going to sleep, then the CPU offlined. After calling stop_machine_run, the CPU eventually returns from preemption and into the idle thread and goes to sleep. The CPU will continue executing previous idle and have no chance to call play_dead. By disabling preemption until we are ready to explicitly schedule, this bug is fixed and the idle threads generally become more robust. From: alexs PPC build fix From: Yoichi Yuasa MIPS build fix Signed-off-by: Nick Piggin Signed-off-by: Yoichi Yuasa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 4 +++- arch/i386/kernel/smpboot.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 7a14fdf..5296e28 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -179,7 +179,7 @@ static inline void play_dead(void) */ void cpu_idle(void) { - int cpu = raw_smp_processor_id(); + int cpu = smp_processor_id(); /* endless idle loop with no priority at all */ while (1) { @@ -201,7 +201,9 @@ void cpu_idle(void) __get_cpu_var(irq_stat).idle_timestamp = jiffies; idle(); } + preempt_enable_no_resched(); schedule(); + preempt_disable(); } } diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 47ec767..bc5a9d9 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -485,6 +485,7 @@ static void __devinit start_secondary(void *unused) * things done here to the most necessary things. */ cpu_init(); + preempt_disable(); smp_callin(); while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) rep_nop(); -- cgit v1.1 From 64c7c8f88559624abdbe12b5da6502e8879f8d28 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Tue, 8 Nov 2005 21:39:04 -0800 Subject: [PATCH] sched: resched and cpu_idle rework Make some changes to the NEED_RESCHED and POLLING_NRFLAG to reduce confusion, and make their semantics rigid. Improves efficiency of resched_task and some cpu_idle routines. * In resched_task: - TIF_NEED_RESCHED is only cleared with the task's runqueue lock held, and as we hold it during resched_task, then there is no need for an atomic test and set there. The only other time this should be set is when the task's quantum expires, in the timer interrupt - this is protected against because the rq lock is irq-safe. - If TIF_NEED_RESCHED is set, then we don't need to do anything. It won't get unset until the task get's schedule()d off. - If we are running on the same CPU as the task we resched, then set TIF_NEED_RESCHED and no further action is required. - If we are running on another CPU, and TIF_POLLING_NRFLAG is *not* set after TIF_NEED_RESCHED has been set, then we need to send an IPI. Using these rules, we are able to remove the test and set operation in resched_task, and make clear the previously vague semantics of POLLING_NRFLAG. * In idle routines: - Enter cpu_idle with preempt disabled. When the need_resched() condition becomes true, explicitly call schedule(). This makes things a bit clearer (IMO), but haven't updated all architectures yet. - Many do a test and clear of TIF_NEED_RESCHED for some reason. According to the resched_task rules, this isn't needed (and actually breaks the assumption that TIF_NEED_RESCHED is only cleared with the runqueue lock held). So remove that. Generally one less locked memory op when switching to the idle thread. - Many idle routines clear TIF_POLLING_NRFLAG, and only set it in the inner most polling idle loops. The above resched_task semantics allow it to be set until before the last time need_resched() is checked before going into a halt requiring interrupt wakeup. Many idle routines simply never enter such a halt, and so POLLING_NRFLAG can be always left set, completely eliminating resched IPIs when rescheduling the idle task. POLLING_NRFLAG width can be increased, to reduce the chance of resched IPIs. Signed-off-by: Nick Piggin Cc: Ingo Molnar Cc: Con Kolivas Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 20 ++++++++++++++- arch/i386/kernel/process.c | 64 ++++++++++++++++++++-------------------------- 2 files changed, 47 insertions(+), 37 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 86e80c5..003548b 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -769,8 +769,26 @@ static int set_system_power_state(u_short state) static int apm_do_idle(void) { u32 eax; + u8 ret = 0; + int idled = 0; + int polling; + + polling = test_thread_flag(TIF_POLLING_NRFLAG); + if (polling) { + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + } + if (!need_resched()) { + idled = 1; + ret = apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax); + } + if (polling) + set_thread_flag(TIF_POLLING_NRFLAG); + + if (!idled) + return 0; - if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) { + if (ret) { static unsigned long t; /* This always fails on some SMP boards running UP kernels. diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 5296e28..1cb261f 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -99,14 +99,22 @@ EXPORT_SYMBOL(enable_hlt); */ void default_idle(void) { + local_irq_enable(); + if (!hlt_counter && boot_cpu_data.hlt_works_ok) { - local_irq_disable(); - if (!need_resched()) - safe_halt(); - else - local_irq_enable(); + clear_thread_flag(TIF_POLLING_NRFLAG); + smp_mb__after_clear_bit(); + while (!need_resched()) { + local_irq_disable(); + if (!need_resched()) + safe_halt(); + else + local_irq_enable(); + } + set_thread_flag(TIF_POLLING_NRFLAG); } else { - cpu_relax(); + while (!need_resched()) + cpu_relax(); } } #ifdef CONFIG_APM_MODULE @@ -120,29 +128,14 @@ EXPORT_SYMBOL(default_idle); */ static void poll_idle (void) { - int oldval; - local_irq_enable(); - /* - * Deal with another CPU just having chosen a thread to - * run here: - */ - oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); - - if (!oldval) { - set_thread_flag(TIF_POLLING_NRFLAG); - asm volatile( - "2:" - "testl %0, %1;" - "rep; nop;" - "je 2b;" - : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); - - clear_thread_flag(TIF_POLLING_NRFLAG); - } else { - set_need_resched(); - } + asm volatile( + "2:" + "testl %0, %1;" + "rep; nop;" + "je 2b;" + : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags)); } #ifdef CONFIG_HOTPLUG_CPU @@ -181,6 +174,8 @@ void cpu_idle(void) { int cpu = smp_processor_id(); + set_thread_flag(TIF_POLLING_NRFLAG); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { @@ -246,15 +241,12 @@ static void mwait_idle(void) { local_irq_enable(); - if (!need_resched()) { - set_thread_flag(TIF_POLLING_NRFLAG); - do { - __monitor((void *)¤t_thread_info()->flags, 0, 0); - if (need_resched()) - break; - __mwait(0, 0); - } while (!need_resched()); - clear_thread_flag(TIF_POLLING_NRFLAG); + while (!need_resched()) { + __monitor((void *)¤t_thread_info()->flags, 0, 0); + smp_mb(); + if (need_resched()) + break; + __mwait(0, 0); } } -- cgit v1.1 From 78ac151c83219bd3de591330c1efc4dc64dd9294 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 8 Nov 2005 21:39:21 -0800 Subject: [PATCH] i386: EXPORT_SYMBOL(screen_info) even #ifndef CONFIG_VT The folllowing modules require screen_info but don't depend on CONFIG_VT: - vga16fb.ko - intelfb.ko Signed-off-by: Adrian Bunk Acked-by: "Antonino A. Daplas" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index b48ac635..fdfcb0c 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -129,9 +129,7 @@ struct drive_info_struct { char dummy[32]; } drive_info; EXPORT_SYMBOL(drive_info); #endif struct screen_info screen_info; -#ifdef CONFIG_VT EXPORT_SYMBOL(screen_info); -#endif struct apm_info apm_info; EXPORT_SYMBOL(apm_info); struct sys_desc_table_struct { -- cgit v1.1 From 6e6ece5dc6022e8086c565498d23511bbceda811 Mon Sep 17 00:00:00 2001 From: Jesse Barnes Date: Tue, 8 Nov 2005 20:13:02 -0800 Subject: [PATCH] PCI: fix for Toshiba ohci1394 quirk After much testing and agony, I've discovered that my previous ohci1394 quirk for Toshiba laptops is not 100% reliable. It apparently fails to do the interrupt line change either correctly or in time, since in about 2 out of 5 boots, the kernel's irqdebug code will *still* disable irq 11 when the ohci1394 driver is loaded (at pci_enable_device time I think). This patch switches things around a little in the workaround. First, it removes the mdelay. I didn't see a need for it and my testing has shown that it's not necessary for the quirk to work. Secondly, instead of trying to change the interrupt line to what ACPI tells us it should be, this patch makes the quirk use the value in the PCI_INTERRUPT_LINE register. On this laptop at least, that seems to be the right thing to do, though additional testing on other laptops and/or with actual firewire devices would be appreciated. Signed-off-by: Jesse Barnes Signed-off-by: Greg Kroah-Hartman --- arch/i386/pci/fixup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/fixup.c b/arch/i386/pci/fixup.c index 3984226..eeb1b1f2d 100644 --- a/arch/i386/pci/fixup.c +++ b/arch/i386/pci/fixup.c @@ -433,9 +433,8 @@ static void __devinit pci_post_fixup_toshiba_ohci1394(struct pci_dev *dev) return; /* only applies to certain Toshibas (so far) */ /* Restore config space on Toshiba laptops */ - mdelay(10); pci_write_config_word(dev, PCI_CACHE_LINE_SIZE, toshiba_line_size); - pci_write_config_word(dev, PCI_INTERRUPT_LINE, dev->irq); + pci_read_config_byte(dev, PCI_INTERRUPT_LINE, (u8 *)&dev->irq); pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, pci_resource_start(dev, 0)); pci_write_config_dword(dev, PCI_BASE_ADDRESS_1, -- cgit v1.1 From bca73e4bf8563d83f7856164caa44d5f42e44cca Mon Sep 17 00:00:00 2001 From: Jeff Garzik Date: Sun, 13 Nov 2005 16:06:25 -0800 Subject: [PATCH] move pm_register/etc. to CONFIG_PM_LEGACY, pm_legacy.h Since few people need the support anymore, this moves the legacy pm_xxx functions to CONFIG_PM_LEGACY, and include/linux/pm_legacy.h. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/Kconfig | 2 +- arch/i386/kernel/apm.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index dbf90ad..6004bb0 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -699,7 +699,7 @@ depends on PM && !X86_VISWS config APM tristate "APM (Advanced Power Management) BIOS support" - depends on PM + depends on PM && PM_LEGACY ---help--- APM is a BIOS specification for saving power using several different techniques. This is mostly useful for battery powered laptops with diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 003548b..1e60acb 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -218,6 +218,7 @@ #include #include #include +#include #include #include #include -- cgit v1.1 From 27d99f7ead8cd6d2231798bff0d4c38814afea22 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Sun, 13 Nov 2005 16:06:51 -0800 Subject: [PATCH] arch/i386/mm/init.c: small cleanups This patch contains the following cleanups: - make a needlessly global function static - every file should include the headers containing the prototypes for it's global functions Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/init.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 542d929..06e26f0 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -267,7 +268,7 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base) pkmap_page_table = pte; } -void __devinit free_new_highpage(struct page *page) +static void __devinit free_new_highpage(struct page *page) { set_page_count(page, 1); __free_page(page); -- cgit v1.1 From e27182088e607880713d9c286a3d92d861c280e4 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 13 Nov 2005 16:06:52 -0800 Subject: [PATCH] i386: NMI pointer comparison fix Instruction pointer comparisons for the NMI on debug stack check/fixup were incorrect. From: Jan Beulich Cc: "Eric W. Biederman" Cc: Zwane Mwaikambo Acked-by: "Seth, Rohit" Cc: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/entry.S | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 9e24f7b2..e50b9315 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -560,11 +560,10 @@ nmi_stack_fixup: nmi_debug_stack_check: cmpw $__KERNEL_CS,16(%esp) jne nmi_stack_correct - cmpl $debug - 1,(%esp) - jle nmi_stack_correct + cmpl $debug,(%esp) + jb nmi_stack_correct cmpl $debug_esp_fix_insn,(%esp) - jle nmi_debug_stack_fixup -nmi_debug_stack_fixup: + ja nmi_stack_correct FIX_STACK(24,nmi_stack_correct, 1) jmp nmi_stack_correct -- cgit v1.1 From 7feacd53347c04aee789ba5d632eda0c3fc421c4 Mon Sep 17 00:00:00 2001 From: Tim Mann Date: Sun, 13 Nov 2005 16:06:54 -0800 Subject: [PATCH] x86: fix cpu_khz with clock=pit Fix http://bugzilla.kernel.org/show_bug.cgi?id=5546 The cpu_khz global is not initialized and remains 0 if you boot with clock=pit, even if the processor does have a TSC. This may have bad ramifications since the variable is used in various places scattered around the kernel, though I didn't check them all to see if they can tolerate cpu_khz = 0. You can observe the problem by doing "cat /proc/cpuinfo"; the cpu MHz line says 0.000. The fix is trivial; call init_cpu_khz() from init_pit(), just as it's called from the timers/timer_foo.c:init_foo() for other values of foo. Cc: john stultz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/timers/timer_pit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/timers/timer_pit.c b/arch/i386/kernel/timers/timer_pit.c index e42e46d3..b9b6bd5 100644 --- a/arch/i386/kernel/timers/timer_pit.c +++ b/arch/i386/kernel/timers/timer_pit.c @@ -25,8 +25,9 @@ static int __init init_pit(char* override) { /* check clock override */ if (override[0] && strncmp(override,"pit",3)) - printk(KERN_ERR "Warning: clock= override failed. Defaulting to PIT\n"); - + printk(KERN_ERR "Warning: clock= override failed. Defaulting " + "to PIT\n"); + init_cpu_khz(); count_p = LATCH; return 0; } -- cgit v1.1 From 53e86b91b7ae66d4c2757195cbd42e00d9199cf2 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sun, 13 Nov 2005 16:07:23 -0800 Subject: [PATCH] i386: generic cmpxchg - Make cmpxchg generally available on the i386 platform. - Provide emulation of cmpxchg suitable for uniprocessor if built and run on 386. From: Christoph Lameter - Cut down patch and small style changes. Signed-off-by: Nick Piggin Signed-off-by: Christoph Lameter Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel.c | 48 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 43601de..c28d26f 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -264,5 +265,52 @@ __init int intel_cpu_init(void) return 0; } +#ifndef CONFIG_X86_CMPXCHG +unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new) +{ + u8 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u8 *)ptr; + if (prev == old) + *(u8 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u8); + +unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new) +{ + u16 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u16 *)ptr; + if (prev == old) + *(u16 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u16); + +unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new) +{ + u32 prev; + unsigned long flags; + + /* Poor man's cmpxchg for 386. Unsuitable for SMP */ + local_irq_save(flags); + prev = *(u32 *)ptr; + if (prev == old) + *(u32 *)ptr = new; + local_irq_restore(flags); + return prev; +} +EXPORT_SYMBOL(cmpxchg_386_u32); +#endif + // arch_initcall(intel_cpu_init); -- cgit v1.1 From fed644132f8ec4bf05b63f79c507c0acaa692c37 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Make i386 compile again with fourth DMA32 zone The code should deal with an additional empty zone, so fix up the #error. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/srat.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/srat.c b/arch/i386/kernel/srat.c index 8de658d..52b3ed5 100644 --- a/arch/i386/kernel/srat.c +++ b/arch/i386/kernel/srat.c @@ -137,8 +137,8 @@ static void __init parse_memory_affinity_structure (char *sratp) "enabled and removable" : "enabled" ) ); } -#if MAX_NR_ZONES != 3 -#error "MAX_NR_ZONES != 3, chunk_to_zone requires review" +#if MAX_NR_ZONES != 4 +#error "MAX_NR_ZONES != 4, chunk_to_zone requires review" #endif /* Take a chunk of pages from page frame cstart to cend and count the number * of pages in each zone, returned via zones[]. -- cgit v1.1 From 6004e1b7effcbb385a6b7c790e4b8008682cf679 Mon Sep 17 00:00:00 2001 From: James Cleverdon Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] i386/x86-64: Share interrupt vectors when there is a large number of interrupt sources Here's a patch that builds on Natalie Protasevich's IRQ compression patch and tries to work for MPS boots as well as ACPI. It is meant for a 4-node IBM x460 NUMA box, which was dying because it had interrupt pins with GSI numbers > NR_IRQS and thus overflowed irq_desc. The problem is that this system has 270 GSIs (which are 1:1 mapped with I/O APIC RTEs) and an 8-node box would have 540. This is much bigger than NR_IRQS (224 for both i386 and x86_64). Also, there aren't enough vectors to go around. There are about 190 usable vectors, not counting the reserved ones and the unused vectors at 0x20 to 0x2F. So, my patch attempts to compress the GSI range and share vectors by sharing IRQs. Cc: "Protasevich, Natalie" Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index b66c13c..82754bb 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -39,17 +39,14 @@ #ifdef CONFIG_X86_64 -static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id) -{ -} extern void __init clustered_apic_check(void); -static inline int ioapic_setup_disabled(void) -{ - return 0; -} +extern int gsi_irq_sharing(int gsi); #include +static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } + + #else /* X86 */ #ifdef CONFIG_X86_LOCAL_APIC @@ -57,6 +54,8 @@ static inline int ioapic_setup_disabled(void) #include #endif /* CONFIG_X86_LOCAL_APIC */ +static inline int gsi_irq_sharing(int gsi) { return gsi; } + #endif /* X86 */ #define BAD_MADT_ENTRY(entry, end) ( \ @@ -459,7 +458,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) *irq = IO_APIC_VECTOR(gsi); else #endif - *irq = gsi; + *irq = gsi_irq_sharing(gsi); return 0; } -- cgit v1.1 From f5f786d0455c359c554b8f74783f887c0a2c9fac Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86-64/i386: Fix CPU model for family 6 According to cpuid instruction in IA32 SDM-Vol2, when computing cpu model, we need to consider extended model ID for family 0x6 also. AK: Also added fixes/simplifcation from Petr Vandrovec Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 74145a3..35a67da 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -233,10 +233,10 @@ static void __init early_cpu_detect(void) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } c->x86_mask = tfms & 15; if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; -- cgit v1.1 From f6c2e3330d3fdd5474bc3756da46fca889a30e33 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sat, 5 Nov 2005 17:25:53 +0100 Subject: [PATCH] x86_64: Unmap NULL during early bootup We should zap the low mappings, as soon as possible, so that we can catch kernel bugs more effectively. Previously early boot had NULL mapped and didn't trap on NULL references. This patch introduces boot_level4_pgt, which will always have low identity addresses mapped. Druing boot, all the processors will use this as their level4 pgt. On BP, we will switch to init_level4_pgt as soon as we enter C code and zap the low mappings as soon as we are done with the usage of identity low mapped addresses. On AP's we will zap the low mappings as soon as we jump to C code. Signed-off-by: Suresh Siddha Signed-off-by: Ashok Raj Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 82754bb..f366772 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -542,7 +542,7 @@ acpi_scan_rsdp(unsigned long start, unsigned long length) * RSDP signature. */ for (offset = 0; offset < length; offset += 16) { - if (strncmp((char *)(start + offset), "RSD PTR ", sig_len)) + if (strncmp((char *)(phys_to_virt(start) + offset), "RSD PTR ", sig_len)) continue; return (start + offset); } -- cgit v1.1 From af9c142de94ecf724a18700273bbba390873e072 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: Force correct address space size for MTRR on some 64bit Intel Xeons They report 40bit, but only have 36bits of physical address space. This caused problems with setting up the correct masks for MTRR. CPUID workaround for steppings 0F33h(supporting x86) and 0F34h(supporting x86 and EM64T). Detail info can be found at: http://download.intel.com/design/Xeon/specupdt/30240216.pdf http://download.intel.com/design/Pentium4/specupdt/30235221.pdf Signed-off-by: Shaohua Li Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/mtrr/main.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index dd4ebd6..1e9db19 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -626,6 +626,14 @@ void __init mtrr_bp_init(void) if (cpuid_eax(0x80000000) >= 0x80000008) { u32 phys_addr; phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); size_and_mask = ~size_or_mask & 0xfff00000; } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && -- cgit v1.1 From 94605eff572b727aaad9b4b29bc358b919096503 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86-64/i386: Intel HT, Multi core detection fixes Fields obtained through cpuid vector 0x1(ebx[16:23]) and vector 0x4(eax[14:25], eax[26:31]) indicate the maximum values and might not always be the same as what is available and what OS sees. So make sure "siblings" and "cpu cores" values in /proc/cpuinfo reflect the values as seen by OS instead of what cpuid instruction says. This will also fix the buggy BIOS cases (for example where cpuid on a single core cpu says there are "2" siblings, even when HT is disabled in the BIOS. http://bugzilla.kernel.org/show_bug.cgi?id=4359) Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/amd.c | 12 +++--- arch/i386/kernel/cpu/common.c | 36 +++++++---------- arch/i386/kernel/cpu/intel.c | 2 +- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- arch/i386/kernel/cpu/proc.c | 7 ++-- arch/i386/kernel/smpboot.c | 73 ++++++++++++++++++++++++---------- 6 files changed, 78 insertions(+), 54 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 53a1681..e344ef88 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c @@ -206,9 +206,9 @@ static void __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; } #ifdef CONFIG_X86_HT @@ -217,15 +217,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) * distingush the cores. Assumes number of cores is a power * of two. */ - if (c->x86_num_cores > 1) { + if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); unsigned bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_num_cores, cpu_core_id[cpu]); + cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif } diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 35a67da..4e9c2e9 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -335,7 +335,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_num_cores = 1; + c->x86_max_cores = 1; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -446,52 +446,44 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1 ) { - index_msb = 31; if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } + index_msb = get_count_order(smp_num_siblings) ; - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); - if (c->x86_num_cores > 1) + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 43601de..8d603ba 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -157,7 +157,7 @@ static void __devinit init_intel(struct cpuinfo_x86 *c) if ( p ) strcpy(c->x86_model_id, p); - c->x86_num_cores = num_cpu_cores(c); + c->x86_max_cores = num_cpu_cores(c); detect_ht(c); diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 4dc42a1..e66d140 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -307,7 +307,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) #ifdef CONFIG_X86_HT else if (num_threads_sharing == smp_num_siblings) this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; - else if (num_threads_sharing == (c->x86_num_cores * smp_num_siblings)) + else if (num_threads_sharing == (c->x86_max_cores * smp_num_siblings)) this_leaf->shared_cpu_map = cpu_core_map[cpu]; else printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 41b871e..e792131 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -94,12 +94,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) if (c->x86_cache_size >= 0) seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); #ifdef CONFIG_X86_HT - if (c->x86_num_cores * smp_num_siblings > 1) { + if (c->x86_max_cores * smp_num_siblings > 1) { seq_printf(m, "physical id\t: %d\n", phys_proc_id[n]); - seq_printf(m, "siblings\t: %d\n", - c->x86_num_cores * smp_num_siblings); + seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[n])); seq_printf(m, "core id\t\t: %d\n", cpu_core_id[n]); - seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); + seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } #endif diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 01b618e..0a9c646 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -74,9 +74,11 @@ EXPORT_SYMBOL(phys_proc_id); int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; EXPORT_SYMBOL(cpu_core_id); +/* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); +/* representing HT and core siblings of each logical CPU */ cpumask_t cpu_core_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_core_map); @@ -444,35 +446,60 @@ static void __devinit smp_callin(void) static int cpucount; +/* representing cpus for which sibling maps can be computed */ +static cpumask_t cpu_sibling_setup_map; + static inline void set_cpu_sibling_map(int cpu) { int i; + struct cpuinfo_x86 *c = cpu_data; + + cpu_set(cpu, cpu_sibling_setup_map); if (smp_num_siblings > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (cpu_core_id[cpu] == cpu_core_id[i]) { + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i] && + cpu_core_id[cpu] == cpu_core_id[i]) { cpu_set(i, cpu_sibling_map[cpu]); cpu_set(cpu, cpu_sibling_map[i]); + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } - if (current_cpu_data.x86_num_cores > 1) { - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_isset(i, cpu_callout_map)) - continue; - if (phys_proc_id[cpu] == phys_proc_id[i]) { - cpu_set(i, cpu_core_map[cpu]); - cpu_set(cpu, cpu_core_map[i]); - } - } - } else { + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; + c[cpu].booted_cores = 1; + return; + } + + for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (phys_proc_id[cpu] == phys_proc_id[i]) { + cpu_set(i, cpu_core_map[cpu]); + cpu_set(cpu, cpu_core_map[i]); + /* + * Does this new cpu bringup a new core? + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) { + /* + * for each core in package, increment + * the booted_cores for this new cpu + */ + if (first_cpu(cpu_sibling_map[i]) == i) + c[cpu].booted_cores++; + /* + * increment the core count for all + * the other cpus in this package + */ + if (i != cpu) + c[i].booted_cores++; + } else if (i != cpu && !c[cpu].booted_cores) + c[cpu].booted_cores = c[i].booted_cores; + } } } @@ -1096,11 +1123,8 @@ static void __init smp_boot_cpus(unsigned int max_cpus) current_thread_info()->cpu = 0; smp_tune_scheduling(); - cpus_clear(cpu_sibling_map[0]); - cpu_set(0, cpu_sibling_map[0]); - cpus_clear(cpu_core_map[0]); - cpu_set(0, cpu_core_map[0]); + set_cpu_sibling_map(0); /* * If we couldn't find an SMP configuration at boot time, @@ -1279,15 +1303,24 @@ static void remove_siblinginfo(int cpu) { int sibling; + struct cpuinfo_x86 *c = cpu_data; + for_each_cpu_mask(sibling, cpu_core_map[cpu]) { + cpu_clear(cpu, cpu_core_map[sibling]); + /* + * last thread sibling in this cpu core going down + */ + if (cpus_weight(cpu_sibling_map[cpu]) == 1) + c[sibling].booted_cores--; + } + for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) cpu_clear(cpu, cpu_sibling_map[sibling]); - for_each_cpu_mask(sibling, cpu_core_map[cpu]) - cpu_clear(cpu, cpu_core_map[sibling]); cpus_clear(cpu_sibling_map[cpu]); cpus_clear(cpu_core_map[cpu]); phys_proc_id[cpu] = BAD_APICID; cpu_core_id[cpu] = BAD_APICID; + cpu_clear(cpu, cpu_sibling_setup_map); } int __cpu_disable(void) -- cgit v1.1 From 2b0918758dd68d6b8d01318a5200b65b9209760d Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Sat, 5 Nov 2005 17:25:54 +0100 Subject: [PATCH] x86_64: x86_64/i386 fix Intel cache detection code assumption about threads sharing Fix the Intel cache detection code assumption that number of threads sharing the cache will either be equal to number of HT or core siblings. This also cleans up the code in general a bit. Signed-off-by: Suresh Siddha Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 46 +++++++++++++++++++++++----------- 1 file changed, 32 insertions(+), 14 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index e66d140..fbfd374 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -293,29 +293,45 @@ static struct _cpuid4_info *cpuid4_info[NR_CPUS]; #ifdef CONFIG_SMP static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) { - struct _cpuid4_info *this_leaf; + struct _cpuid4_info *this_leaf, *sibling_leaf; unsigned long num_threads_sharing; -#ifdef CONFIG_X86_HT - struct cpuinfo_x86 *c = cpu_data + cpu; -#endif + int index_msb, i; + struct cpuinfo_x86 *c = cpu_data; this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; if (num_threads_sharing == 1) cpu_set(cpu, this_leaf->shared_cpu_map); -#ifdef CONFIG_X86_HT - else if (num_threads_sharing == smp_num_siblings) - this_leaf->shared_cpu_map = cpu_sibling_map[cpu]; - else if (num_threads_sharing == (c->x86_max_cores * smp_num_siblings)) - this_leaf->shared_cpu_map = cpu_core_map[cpu]; - else - printk(KERN_DEBUG "Number of CPUs sharing cache didn't match " - "any known set of CPUs\n"); -#endif + else { + index_msb = get_count_order(num_threads_sharing); + + for_each_online_cpu(i) { + if (c[i].apicid >> index_msb == + c[cpu].apicid >> index_msb) { + cpu_set(i, this_leaf->shared_cpu_map); + if (i != cpu && cpuid4_info[i]) { + sibling_leaf = CPUID4_INFO_IDX(i, index); + cpu_set(cpu, sibling_leaf->shared_cpu_map); + } + } + } + } +} +static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + int sibling; + + this_leaf = CPUID4_INFO_IDX(cpu, index); + for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { + sibling_leaf = CPUID4_INFO_IDX(sibling, index); + cpu_clear(cpu, sibling_leaf->shared_cpu_map); + } } #else static void __init cache_shared_cpu_map_setup(unsigned int cpu, int index) {} +static void __init cache_remove_shared_cpu_map(unsigned int cpu, int index) {} #endif static void free_cache_attributes(unsigned int cpu) @@ -574,8 +590,10 @@ static void __cpuexit cache_remove_dev(struct sys_device * sys_dev) unsigned int cpu = sys_dev->id; unsigned long i; - for (i = 0; i < num_cache_leaves; i++) + for (i = 0; i < num_cache_leaves; i++) { + cache_remove_shared_cpu_map(cpu, i); kobject_unregister(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); + } kobject_unregister(cache_kobject[cpu]); cpuid4_cache_sysfs_exit(cpu); return; -- cgit v1.1 From 19842d67340e4a8f616552d344e97fc7452aa37a Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Tue, 15 Nov 2005 00:09:04 -0800 Subject: [PATCH] drop "[PATCH] i386 kexec-on-panic: Don't shutdown the apics" A patch by Eric was merged (f2b36db692b7ff6972320ad9839ae656a3b0ee3e) and later on reverted back (1e4c85f97fe26fbd70da12148b3992c0e00361fd). Along with above patch, another patch was posted and has been merged (3d1675b41b02d64bd1185903ea0d25a8c0bb6dea). That patch was dependent on the above patch and now it should also be reverted. Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index af809cc..0248e08 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -21,6 +21,7 @@ #include #include #include +#include #include @@ -147,6 +148,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) regs = &fixed_regs; } crash_save_this_cpu(regs, cpu); + disable_local_APIC(); atomic_dec(&waiting_for_crash_ipi); /* Assume hlt works */ halt(); @@ -186,6 +188,7 @@ static void nmi_shootdown_cpus(void) } /* Leave the nmi callback set */ + disable_local_APIC(); } #else static void nmi_shootdown_cpus(void) @@ -210,5 +213,9 @@ void machine_crash_shutdown(struct pt_regs *regs) /* Make a note of crashing cpu. Will be used in NMI callback.*/ crashing_cpu = smp_processor_id(); nmi_shootdown_cpus(); + lapic_shutdown(); +#if defined(CONFIG_X86_IO_APIC) + disable_IO_APIC(); +#endif crash_save_self(regs); } -- cgit v1.1 From d7169160bb772efe6510d8bc0c8e7625efbcc0b3 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 20 Nov 2005 18:49:05 +0100 Subject: [PATCH] i386: Use bigsmp for > 8 core Opteron systems bigsmp is reported to work on large Opteron systems on 32bit too. Enable it by default there. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/mpparse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 8f767d9..1ca5269b1 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -220,8 +220,9 @@ static void __devinit MP_processor_info (struct mpc_config_processor *m) num_processors++; if ((num_processors > 8) && - APIC_XAPIC(ver) && - (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) + ((APIC_XAPIC(ver) && + (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) || + (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))) def_to_bigsmp = 1; else def_to_bigsmp = 0; -- cgit v1.1 From fbe83e209ad9c8281e29ac17a60f91119d86fa8c Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sun, 20 Nov 2005 18:49:06 +0100 Subject: [PATCH] Register disabled CPUs Needed to make the earlier use disabled CPUs for CPU hotplug patch actually work. Need to register disabled processors as well, so we can count them towards cpu_possible_map as hot pluggable cpus. Signed-off-by: Ashok Raj Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index f366772..76b1135 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -248,9 +248,7 @@ acpi_parse_lapic(acpi_table_entry_header * header, const unsigned long end) acpi_table_print_madt_entry(header); - /* no utility in registering a disabled processor */ - if (processor->flags.enabled == 0) - return 0; + /* Register even disabled CPUs for cpu hotplug */ x86_acpiid_to_apicid[processor->acpi_id] = processor->id; -- cgit v1.1 From 8bf1101bd52573e0573e374d56d2feecdbb5e444 Mon Sep 17 00:00:00 2001 From: Jim Keniston Date: Wed, 23 Nov 2005 13:37:42 -0800 Subject: [PATCH] kprobes: Fix return probes on sys_execve Fix a bug in kprobes that can cause an Oops or even a crash when a return probe is installed on one of the following functions: sys_execve, do_execve, load_*_binary, flush_old_exec, or flush_thread. The fix is to remove the call to kprobe_flush_task() in flush_thread(). This fix has been tested on all architectures for which the return-probes feature has been implemented (i386, x86_64, ppc64, ia64). Please apply. BACKGROUND Up to now, we have called kprobe_flush_task() under two situations: when a task exits, and when it execs. Flushing kretprobe_instances on exit is correct because (a) do_exit() doesn't return, and (b) one or more return-probed functions may be active when a task calls do_exit(). Neither is the case for sys_execve() and its callees. Initially, the mistaken call to kprobe_flush_task() on exec was harmless because we put the "real" return address of each active probed function back in the stack, just to be safe, when we recycled its kretprobe_instance. When support for ppc64 and ia64 was added, this safety measure couldn't be employed, and was eventually dropped even for i386 and x86_64. sys_execve() and its callees were informally blacklisted for return probes until this fix was developed. Acked-by: Prasanna S Panchamukhi Signed-off-by: Jim Keniston Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 1cb261f..df6c2bc 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -393,13 +393,6 @@ void flush_thread(void) { struct task_struct *tsk = current; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(tsk); - memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); /* -- cgit v1.1 From dcb890749bbe63af96163c499e9c86b441fb6c83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=E4kia?= Date: Wed, 23 Nov 2005 15:44:49 -0800 Subject: [PATCH] PCI: trivial printk updates in common.c MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Modified common.c so it's using the appropriate KERN_* in printk() calls. Signed-off-by: Daniel Marjamäkia Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- arch/i386/pci/common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c index c96bea1..f6bc48d 100644 --- a/arch/i386/pci/common.c +++ b/arch/i386/pci/common.c @@ -132,7 +132,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) } } - printk("PCI: Probing PCI hardware (bus %02x)\n", busnum); + printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); return pci_scan_bus_parented(NULL, busnum, &pci_root_ops, NULL); } @@ -144,7 +144,7 @@ static int __init pcibios_init(void) struct cpuinfo_x86 *c = &boot_cpu_data; if (!raw_pci_ops) { - printk("PCI: System does not support PCI\n"); + printk(KERN_WARNING "PCI: System does not support PCI\n"); return 0; } -- cgit v1.1 From ed6d14f9760857c745206c978b80352fc09cfd19 Mon Sep 17 00:00:00 2001 From: Rajesh Shah Date: Wed, 23 Nov 2005 15:44:59 -0800 Subject: [PATCH] PCI: remove bogus resource collision error When attempting to hotadd a PCI card with a bridge on it, I saw the kernel reporting resource collision errors even when there were really no collisions. The problem is that the code doesn't skip over "invalid" resources with their resource type flag not set. Others have reported similar problems at boot time and for non-bridge PCI card hotplug too, where the code flags a resource collision for disabled ROMs. This patch fixes both problems. Signed-off-by: Rajesh Shah Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- arch/i386/pci/i386.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/i386.c b/arch/i386/pci/i386.c index 6d63385..ed2c8c8 100644 --- a/arch/i386/pci/i386.c +++ b/arch/i386/pci/i386.c @@ -221,6 +221,11 @@ int pcibios_enable_resources(struct pci_dev *dev, int mask) continue; r = &dev->resource[idx]; + if (!(r->flags & (IORESOURCE_IO | IORESOURCE_MEM))) + continue; + if ((idx == PCI_ROM_RESOURCE) && + (!(r->flags & IORESOURCE_ROM_ENABLE))) + continue; if (!r->start && r->end) { printk(KERN_ERR "PCI: Device %s not available because of resource collisions\n", pci_name(dev)); return -EINVAL; @@ -230,8 +235,6 @@ int pcibios_enable_resources(struct pci_dev *dev, int mask) if (r->flags & IORESOURCE_MEM) cmd |= PCI_COMMAND_MEMORY; } - if (dev->resource[PCI_ROM_RESOURCE].start) - cmd |= PCI_COMMAND_MEMORY; if (cmd != old_cmd) { printk("PCI: Enabling device %s (%04x -> %04x)\n", pci_name(dev), old_cmd, cmd); pci_write_config_word(dev, PCI_COMMAND, cmd); -- cgit v1.1 From cac1a293469a868fab1ecc2dc1b6441728f7e0e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Marjam=E4ki?= Date: Wed, 23 Nov 2005 15:45:09 -0800 Subject: [PATCH] PCI: direct.c: DBG MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The DBG() call where updated with the appropriate KERN_* symbol. Signed-off-by: Daniel Marjamäki Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- arch/i386/pci/direct.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386') diff --git a/arch/i386/pci/direct.c b/arch/i386/pci/direct.c index 30b7e9b..94331d6 100644 --- a/arch/i386/pci/direct.c +++ b/arch/i386/pci/direct.c @@ -201,7 +201,7 @@ static int __init pci_sanity_check(struct pci_raw_ops *o) return 1; } - DBG("PCI: Sanity check failed\n"); + DBG(KERN_WARNING "PCI: Sanity check failed\n"); return 0; } -- cgit v1.1 From d91b14c463306eb6527550ba48617e7f5500d3ae Mon Sep 17 00:00:00 2001 From: Thierry Vignaud Date: Tue, 29 Nov 2005 19:34:35 -0800 Subject: [PATCH] fix rebooting on HP nc6120 laptop Anne NICOLAS and Andres Kaaber reported their HP laptop didn't reboot smoothly. Signed-off-by: Thierry Vignaud Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 350ea66..2afe0f8 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c @@ -111,6 +111,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), }, }, + { /* Handle problems with rebooting on HP nc6120 */ + .callback = set_bios_reboot, + .ident = "HP Compaq nc6120", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), + DMI_MATCH(DMI_PRODUCT_NAME, "HP Compaq nc6120"), + }, + }, { } }; -- cgit v1.1 From fe655d3a06488c8a188461bca493e9f23fc8c448 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 29 Nov 2005 19:34:42 -0800 Subject: [PATCH] setting irq affinity is broken in ia32 with MSI enabled Setting irq affinity stops working when MSI is enabled. With MSI, move_irq is empty, so we can't change irq affinity. It appears a typo in Ashok's original commit for this issue. X86_64 actually is using move_native_irq. Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index cc5d7ac..22c8675 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -2009,7 +2009,7 @@ static void ack_edge_ioapic_vector(unsigned int vector) { int irq = vector_to_irq(vector); - move_irq(vector); + move_native_irq(vector); ack_edge_ioapic_irq(irq); } @@ -2024,7 +2024,7 @@ static void end_level_ioapic_vector (unsigned int vector) { int irq = vector_to_irq(vector); - move_irq(vector); + move_native_irq(vector); end_level_ioapic_irq(irq); } -- cgit v1.1 From e6e87b4bfe3720b4308a8e669078d9be58bc9780 Mon Sep 17 00:00:00 2001 From: David Shaohua Li Date: Wed, 21 Sep 2005 01:35:00 -0400 Subject: [ACPI] properly detect pmtimer on ASUS a8v motherboard Handle FADT 2.0 xpmtmr address 0 case. http://bugzilla.kernel.org/show_bug.cgi?id=5283 Signed-off-by: Shaohua Li Signed-off-by: Andrew Morton Signed-off-by: Len Brown --- arch/i386/kernel/acpi/boot.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/i386') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 76b1135..447fa9e 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -638,6 +638,13 @@ static int __init acpi_parse_fadt(unsigned long phys, unsigned long size) return 0; pmtmr_ioport = fadt->xpm_tmr_blk.address; + /* + * "X" fields are optional extensions to the original V1.0 + * fields, so we must selectively expand V1.0 fields if the + * corresponding X field is zero. + */ + if (!pmtmr_ioport) + pmtmr_ioport = fadt->V1_pm_tmr_blk; } else { /* FADT rev. 1 */ pmtmr_ioport = fadt->V1_pm_tmr_blk; -- cgit v1.1