From 5d8b532af9e52ea89208f5ef31889f646e67ba28 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 16 Jan 2009 23:09:14 +0100 Subject: ACPI suspend: Fix compilation warnings in drivers/acpi/sleep.c Fix two compilation warnings in drivers/acpi/sleep.c, one triggered by unsetting CONFIG_SUSPEND and the other triggered by unsetting CONFIG_HIBERNATION, by moving some code under the appropriate #ifdefs . Signed-off-by: Rafael J. Wysocki Signed-off-by: Len Brown --- arch/x86/kernel/acpi/sleep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 707c1f6..a60c1f3 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -156,11 +156,11 @@ static int __init acpi_sleep_setup(char *str) #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); + if (strncmp(str, "s4_nonvs", 8) == 0) + acpi_s4_no_nvs(); #endif if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); -- cgit v1.1 From 92ab78315c638515d0e81b0c70b2082f713582d9 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Sat, 31 Jan 2009 17:24:43 +0100 Subject: x86/Voyager: make it build and boot [ mingo@elte.hu: these fixes are a subset of changes cherry-picked from: git://git.kernel.org:/pub/scm/linux/kernel/git/jejb/voyager-2.6.git They fix various problems that recent x86 changes caused in the Voyager subarchitecture: both APIC changes and cpumask changes and certain cleanups caused subarch assumptions to break. Most of these changes are obsolete as the subarch code has been removed from the x86 development tree - but we merge them upstream to make Voyager build and boot. ] Signed-off-by: James Bottomley Signed-off-by: Ingo Molnar --- arch/x86/kernel/irqinit_32.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 1507ad4e..10a09c2 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -78,15 +78,6 @@ void __init init_ISA_irqs(void) } } -/* - * IRQ2 is cascade interrupt to second interrupt controller - */ -static struct irqaction irq2 = { - .handler = no_action, - .mask = CPU_MASK_NONE, - .name = "cascade", -}; - DEFINE_PER_CPU(vector_irq_t, vector_irq) = { [0 ... IRQ0_VECTOR - 1] = -1, [IRQ0_VECTOR] = 0, @@ -178,9 +169,6 @@ void __init native_init_IRQ(void) alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); #endif - if (!acpi_ioapic) - setup_irq(2, &irq2); - /* setup after call gates are initialised (usually add in * the architecture specific gates) */ -- cgit v1.1 From 9a8ecae87a2b698964b1db9ea504ba1099f479fc Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 31 Jan 2009 20:12:14 -0500 Subject: x86: add cache descriptors for Intel Core i7 Signed-off-by: Dave Jones Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 48533d7..da299eb 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -36,8 +36,11 @@ static struct _cache_table cache_table[] __cpuinitdata = { { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ + { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */ { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */ @@ -85,6 +88,18 @@ static struct _cache_table cache_table[] __cpuinitdata = { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */ { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */ + { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */ + { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */ + { 0xd7, LVL_3, 2038 }, /* 8-way set assoc, 64 byte line size */ + { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ + { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */ + { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */ + { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */ + { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */ + { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */ + { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */ { 0x00, 0, 0} }; -- cgit v1.1 From 10b888d6cec2688e65e9e128b14bf98ecd199da2 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sat, 31 Jan 2009 14:50:07 -0800 Subject: irq, x86: fix lock status with numa_migrate_irq_desc Eric Paris reported: > I have an hp dl785g5 which is unable to successfully run > 2.6.29-0.66.rc3.fc11.x86_64 or 2.6.29-rc2-next-20090126. During bootup > (early in userspace daemons starting) I get the below BUG, which quickly > renders the machine dead. I assume it is because sparse_irq_lock never > gets released when the BUG kills that task. Adjust lock sequence when migrating a descriptor with CONFIG_NUMA_MIGRATE_IRQ_DESC enabled. Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/io_apic.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 1c4a130..9b0c480 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -2528,14 +2528,15 @@ static void irq_complete_move(struct irq_desc **descp) vector = ~get_irq_regs()->orig_ax; me = smp_processor_id(); + + if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC *descp = desc = move_irq_desc(desc, me); /* get the new one */ cfg = desc->chip_data; #endif - - if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) send_cleanup_vector(cfg); + } } #else static inline void irq_complete_move(struct irq_desc **descp) {} -- cgit v1.1 From a67798cd7bb130bf37f5ffb28f3260f4c10232db Mon Sep 17 00:00:00 2001 From: Martin Hicks Date: Fri, 30 Jan 2009 10:50:54 -0600 Subject: x86: push old stack address on irqstack for unwinder Impact: Fixes dumpstack and KDB on 64 bits This re-adds the old stack pointer to the top of the irqstack to help with unwinding. It was removed in commit d99015b1abbad743aa049b439c1e1dede6d0fa49 as part of the save_args out-of-line work. Both dumpstack and KDB require this information. Signed-off-by: Martin Hicks Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e28c7a9..a134621 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -346,6 +346,7 @@ ENTRY(save_args) popq_cfi %rax /* move return address... */ mov %gs:pda_irqstackptr,%rsp EMPTY_FRAME 0 + pushq_cfi %rbp /* backlink for unwinder */ pushq_cfi %rax /* ... to the new stack */ /* * We entered an interrupt context - irqs are off: -- cgit v1.1 From 858770619debfb9269add63e4ba8b7c6b5538dd1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 3 Feb 2009 16:24:22 +0100 Subject: x86: APIC: enable workaround on AMD Fam10h CPUs Impact: fix to enable APIC for AMD Fam10h on chipsets with a missing/b0rked ACPI MP table (MADT) Booting a 32bit kernel on an AMD Fam10h CPU running on chipsets with missing/b0rked MP table leads to a hang pretty early in the boot process due to the APIC not being initialized. Fix that by falling back to the default APIC base address in 32bit code, as it is done in the 64bit codepath. Signed-off-by: Borislav Petkov Signed-off-by: H. Peter Anvin --- arch/x86/kernel/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 4b6df24..115449f 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -1436,7 +1436,7 @@ static int __init detect_init_APIC(void) switch (boot_cpu_data.x86_vendor) { case X86_VENDOR_AMD: if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model > 1) || - (boot_cpu_data.x86 == 15)) + (boot_cpu_data.x86 >= 15)) break; goto no_apic; case X86_VENDOR_INTEL: -- cgit v1.1 From 62663ea8220366472fe20462831f2d69d7987439 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Tue, 3 Feb 2009 17:46:46 +0100 Subject: ACPI: cpufreq: Remove deprecated /proc/acpi/processor/../performance proc entries They were long enough set deprecated... Update Documentation/cpu-freq/users-guide.txt: The deprecated files listed there seen not to exist for some time anymore already. Signed-off-by: Thomas Renninger Signed-off-by: Len Brown --- arch/x86/kernel/cpu/cpufreq/Kconfig | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index efae3b2..65792c2 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig @@ -245,17 +245,6 @@ config X86_E_POWERSAVER comment "shared options" -config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" - depends on PROC_FS - depends on X86_ACPI_CPUFREQ || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI - help - This enables the deprecated /proc/acpi/processor/../performance - interface. While it is helpful for debugging, the generic, - cross-architecture cpufreq interfaces should be used. - - If in doubt, say N. - config X86_SPEEDSTEP_LIB tristate default (X86_SPEEDSTEP_ICH || X86_SPEEDSTEP_SMI || X86_P4_CLOCKMOD) -- cgit v1.1 From 48ec4d9537282a55d602136724f069faafcac8c8 Mon Sep 17 00:00:00 2001 From: Kyle McMartin Date: Wed, 4 Feb 2009 15:54:45 -0500 Subject: x86, 64-bit: print DMI info in the oops trace This patch echoes what we already do on 32-bit since 90f7d25c6b672137344f447a30a9159945ffea72, and prints the DMI product name in show_regs, so that system specific problems can be easily identified. Signed-off-by: Kyle McMartin Signed-off-by: Ingo Molnar --- arch/x86/kernel/process_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 416fb92..85b4cb5 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -151,14 +152,18 @@ void __show_regs(struct pt_regs *regs, int all) unsigned long d0, d1, d2, d3, d6, d7; unsigned int fsindex, gsindex; unsigned int ds, cs, es; + const char *board; printk("\n"); print_modules(); - printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", + board = dmi_get_system_info(DMI_PRODUCT_NAME); + if (!board) + board = ""; + printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + init_utsname()->version, board); printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); printk_address(regs->ip, 1); printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, -- cgit v1.1 From a6a95406c676ffe4f9dee708eb404a17c69f7fdd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: x86: fix hpet timer reinit for x86_64 There's a small problem with hpet_rtc_reinit function - it checks for the: hpet_readl(HPET_COUNTER) - hpet_t1_cmp > 0 to continue increasing both the HPET_T1_CMP (register) and the hpet_t1_cmp (variable). But since the HPET_COUNTER is always 32-bit, if the hpet_t1_cmp is 64-bit this condition will always be FALSE once the latter hits the 32-bit boundary, and we can have a situation, when we don't increase the HPET_T1_CMP register high enough. The result - timer stops ticking, since HPET_T1_CMP becomes less, than the COUNTER and never increased again. The solution is (based on Linus's suggestion) to not compare 64-bits (on 64-bit x86), but to do the comparison on 32-bit signed integers. Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 64d5ad0..c761f91 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -1075,7 +1075,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) -- cgit v1.1 From 4560839939f4b4a96e21e80584f87308ac93c1da Mon Sep 17 00:00:00 2001 From: Alex Chiang Date: Wed, 4 Feb 2009 16:44:01 -0700 Subject: x86: fix grammar in user-visible BIOS warning Fix user-visible grammo. Signed-off-by: Alex Chiang Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ae0d804..c461f6d 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -607,7 +607,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { printk(KERN_NOTICE - "%s detected: BIOS may corrupt low RAM, working it around.\n", + "%s detected: BIOS may corrupt low RAM, working around it.\n", d->ident); e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); -- cgit v1.1 From 732553e567c2700ba5b9bccc6ec885c75779a94b Mon Sep 17 00:00:00 2001 From: Mark Langsdorf Date: Tue, 3 Feb 2009 17:46:43 +0100 Subject: [CPUFREQ] powernow-k8: Get transition latency from ACPI _PSS table At this time, the PowerNow! driver for K8 uses an experimentally derived formula to calculate transition latency. The value it provides is orders of magnitude too large on modern systems. This patch replaces the formula with ACPI _PSS latency values for more accuracy and better performance. I've tested it on two 2nd generation Opteron systems, a 3rd generation Operton system, and a Turion X2 without seeing any stability problems. Signed-off-by: Mark Langsdorf Signed-off-by: Thomas Renninger Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 5c28b37..fb039cd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) free_cpumask_var(data->acpi_data.shared_cpu_map); } +static int get_transition_latency(struct powernow_k8_data *data) +{ + int max_latency = 0; + int i; + for (i = 0; i < data->acpi_data.state_count; i++) { + int cur_latency = data->acpi_data.states[i].transition_latency + + data->acpi_data.states[i].bus_master_latency; + if (cur_latency > max_latency) + max_latency = cur_latency; + } + /* value in usecs, needs to be in nanoseconds */ + return 1000 * max_latency; +} + #else static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } +static int get_transition_latency(struct powernow_k8_data *data) { return 0; } #endif /* CONFIG_X86_POWERNOW_K8_ACPI */ /* Take a frequency, and issue the fid/vid transition command */ @@ -1173,7 +1188,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) if (rc) { goto err_out; } - } + /* Take a crude guess here. + * That guess was in microseconds, so multiply with 1000 */ + pol->cpuinfo.transition_latency = ( + ((data->rvo + 8) * data->vstable * VST_UNITS_20US) + + ((1 << data->irt) * 30)) * 1000; + } else /* ACPI _PSS objects available */ + pol->cpuinfo.transition_latency = get_transition_latency(data); /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -1204,11 +1225,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); data->available_cores = pol->cpus; - /* Take a crude guess here. - * That guess was in microseconds, so multiply with 1000 */ - pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) - + (3 * (1 << data->irt) * 10)) * 1000; - if (cpu_family == CPU_HW_PSTATE) pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); else -- cgit v1.1 From ff08f76d738d0ec0f334b187f61e160caa321d54 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 4 Feb 2009 13:40:31 +0300 Subject: x86: clean up hpet timer reinit Implement Linus's suggestion: introduce the hpet_cnt_ahead() helper function to compare hpet time values - like other wrapping counter comparisons are abstracted away elsewhere. (jiffies, ktime_t, etc.) Reported-by: Kirill Korotaev Signed-off-by: Pavel Emelyanov Signed-off-by: Ingo Molnar --- arch/x86/kernel/hpet.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c761f91..388254f 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -897,7 +897,7 @@ static unsigned long hpet_rtc_flags; static int hpet_prev_update_sec; static struct rtc_time hpet_alarm_time; static unsigned long hpet_pie_count; -static unsigned long hpet_t1_cmp; +static u32 hpet_t1_cmp; static unsigned long hpet_default_delta; static unsigned long hpet_pie_delta; static unsigned long hpet_pie_limit; @@ -905,6 +905,14 @@ static unsigned long hpet_pie_limit; static rtc_irq_handler irq_handler; /* + * Check that the hpet counter c1 is ahead of the c2 + */ +static inline int hpet_cnt_ahead(u32 c1, u32 c2) +{ + return (s32)(c2 - c1) < 0; +} + +/* * Registers a IRQ handler. */ int hpet_register_irq_handler(rtc_irq_handler handler) @@ -1075,7 +1083,7 @@ static void hpet_rtc_timer_reinit(void) hpet_t1_cmp += delta; hpet_writel(hpet_t1_cmp, HPET_T1_CMP); lost_ints++; - } while ((s32)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); + } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER))); if (lost_ints) { if (hpet_rtc_flags & RTC_PIE) -- cgit v1.1 From e736ad548db152776de61d7a26805cfae77ce5ce Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Fri, 6 Feb 2009 16:52:05 -0800 Subject: x86: add clflush before monitor for Intel 7400 series For Intel 7400 series CPUs, the recommendation is to use a clflush on the monitored address just before monitor and mwait pair [1]. This clflush makes sure that there are no false wakeups from mwait when the monitored address was recently written to. [1] "MONITOR/MWAIT Recommendations for Intel Xeon Processor 7400 series" section in specification update document of 7400 series http://download.intel.com/design/xeon/specupdt/32033601.pdf Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel.c | 3 +++ arch/x86/kernel/process.c | 6 ++++++ 2 files changed, 9 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 430e5c3..24ff26a 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -291,6 +291,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) ds_init_intel(c); } + if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) + set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR); + #ifdef CONFIG_X86_64 if (c->x86 == 15) c->x86_cache_alignment = c->x86_clflush_size * 2; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index e68bb9e..6d12f7e 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); if (!need_resched()) { + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) @@ -194,6 +197,9 @@ static void mwait_idle(void) struct power_trace it; if (!need_resched()) { trace_power_start(&it, POWER_CSTATE, 1); + if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + clflush((void *)¤t_thread_info()->flags); + __monitor((void *)¤t_thread_info()->flags, 0, 0); smp_mb(); if (!need_resched()) -- cgit v1.1 From 3f4a739c6accd651a11fcf3c7a20ec8147c42660 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Sun, 8 Feb 2009 16:18:03 -0800 Subject: x86: find nr_irqs_gsi with mp_ioapic_routing Impact: find right nr_irqs_gsi on some systems. One test-system has gap between gsi's: [ 0.000000] ACPI: IOAPIC (id[0x04] address[0xfec00000] gsi_base[0]) [ 0.000000] IOAPIC[0]: apic_id 4, version 0, address 0xfec00000, GSI 0-23 [ 0.000000] ACPI: IOAPIC (id[0x05] address[0xfeafd000] gsi_base[48]) [ 0.000000] IOAPIC[1]: apic_id 5, version 0, address 0xfeafd000, GSI 48-54 [ 0.000000] ACPI: IOAPIC (id[0x06] address[0xfeafc000] gsi_base[56]) [ 0.000000] IOAPIC[2]: apic_id 6, version 0, address 0xfeafc000, GSI 56-62 ... [ 0.000000] nr_irqs_gsi: 38 So nr_irqs_gsi is not right. some irq for MSI will overwrite with io_apic. need to get that with acpi_probe_gsi when acpi io_apic is used Signed-off-by: Yinghai Lu Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 23 +++++++++++++++++++++++ arch/x86/kernel/io_apic.c | 20 +++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index d37593c..7678f10 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -973,6 +973,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) nr_ioapics++; } +int __init acpi_probe_gsi(void) +{ + int idx; + int gsi; + int max_gsi = 0; + + if (acpi_disabled) + return 0; + + if (!acpi_ioapic) + return 0; + + max_gsi = 0; + for (idx = 0; idx < nr_ioapics; idx++) { + gsi = mp_ioapic_routing[idx].gsi_end; + + if (gsi > max_gsi) + max_gsi = gsi; + } + + return max_gsi + 1; +} + static void assign_to_mp_irq(struct mp_config_intsrc *m, struct mp_config_intsrc *mp_irq) { diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9b0c480..bc7ac4d 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c @@ -3841,14 +3841,24 @@ int __init io_apic_get_redir_entries (int ioapic) void __init probe_nr_irqs_gsi(void) { - int idx; int nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) + nr = acpi_probe_gsi(); + if (nr > nr_irqs_gsi) { nr_irqs_gsi = nr; + } else { + /* for acpi=off or acpi is not compiled in */ + int idx; + + nr = 0; + for (idx = 0; idx < nr_ioapics; idx++) + nr += io_apic_get_redir_entries(idx) + 1; + + if (nr > nr_irqs_gsi) + nr_irqs_gsi = nr; + } + + printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } /* -------------------------------------------------------------------------- -- cgit v1.1 From 55a8ba4b7f76bebd7e8ce3f74c04b140627a1bad Mon Sep 17 00:00:00 2001 From: Alok Kataria Date: Fri, 6 Feb 2009 10:29:35 -0800 Subject: x86, vmi: put a missing paravirt_release_pmd in pgd_dtor Commit 6194ba6ff6ccf8d5c54c857600843c67aa82c407 ("x86: don't special-case pmd allocations as much") made changes to the way we handle pmd allocations, and while doing that it dropped a call to paravirt_release_pd on the pgd page from the pgd_dtor code path. As a result of this missing release, the hypervisor is now unaware of the pgd page being freed, and as a result it ends up tracking this page as a page table page. After this the guest may start using the same page for other purposes, and depending on what use the page is put to, it may result in various performance and/or functional issues ( hangs, reboots). Since this release is only required for VMI, I now release the pgd page from the (vmi)_pgd_free hook. Signed-off-by: Alok N Kataria Acked-by: Jeremy Fitzhardinge Signed-off-by: Ingo Molnar Cc: --- arch/x86/kernel/vmi_32.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 1d3302c..bef58b4 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c @@ -321,6 +321,16 @@ static void vmi_release_pmd(unsigned long pfn) } /* + * We use the pgd_free hook for releasing the pgd page: + */ +static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd) +{ + unsigned long pfn = __pa(pgd) >> PAGE_SHIFT; + + vmi_ops.release_page(pfn, VMI_PAGE_L2); +} + +/* * Helper macros for MMU update flags. We can defer updates until a flush * or page invalidation only if the update is to the current address space * (otherwise, there is no flush). We must check against init_mm, since @@ -762,6 +772,7 @@ static inline int __init activate_vmi(void) if (vmi_ops.release_page) { pv_mmu_ops.release_pte = vmi_release_pte; pv_mmu_ops.release_pmd = vmi_release_pmd; + pv_mmu_ops.pgd_free = vmi_pgd_free; } /* Set linear is needed in all cases */ -- cgit v1.1 From d315760ffa261c15ff92699ac6f514112543d7ca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 9 Feb 2009 22:17:39 +0900 Subject: x86: fix math_emu register frame access do_device_not_available() is the handler for #NM and it declares that it takes a unsigned long and calls math_emu(), which takes a long argument and surprisingly expects the stack frame starting at the zero argument would match struct math_emu_info, which isn't true regardless of configuration in the current code. This patch makes do_device_not_available() take struct pt_regs like other exception handlers and initialize struct math_emu_info with pointer to it and pass pointer to the math_emu_info to math_emulate() like normal C functions do. This way, unless gcc makes a copy of struct pt_regs in do_device_not_available(), the register frame is correctly accessed regardless of kernel configuration or compiler used. This doesn't fix all math_emu problems but it at least gets it somewhat working. Signed-off-by: Tejun Heo Signed-off-by: Ingo Molnar --- arch/x86/kernel/traps.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 98c2d055..7932338 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -896,7 +896,7 @@ asmlinkage void math_state_restore(void) EXPORT_SYMBOL_GPL(math_state_restore); #ifndef CONFIG_MATH_EMULATION -asmlinkage void math_emulate(long arg) +void math_emulate(struct math_emu_info *info) { printk(KERN_EMERG "math-emulation not enabled and no coprocessor found.\n"); @@ -906,16 +906,19 @@ asmlinkage void math_emulate(long arg) } #endif /* CONFIG_MATH_EMULATION */ -dotraplinkage void __kprobes -do_device_not_available(struct pt_regs *regs, long error) +dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) { #ifdef CONFIG_X86_32 if (read_cr0() & X86_CR0_EM) { - conditional_sti(regs); - math_emulate(0); + struct math_emu_info info = { }; + + conditional_sti(®s); + + info.regs = ®s; + math_emulate(&info); } else { math_state_restore(); /* interrupts still off */ - conditional_sti(regs); + conditional_sti(®s); } #else math_state_restore(); -- cgit v1.1 From b52af40923fc91a12e3c7152d833e0c0c6a508f6 Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Tue, 10 Feb 2009 09:21:07 +0100 Subject: i8327: fix outb() parameter order In i8237A_resume(), when resetting the DMA controller, the parameters to dma_outb() were mixed up. Signed-off-by: Clemens Ladisch [ cleaned up the file a tiny bit. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/i8237.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c index dbd6c1d..b42ca69 100644 --- a/arch/x86/kernel/i8237.c +++ b/arch/x86/kernel/i8237.c @@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev) flags = claim_dma_lock(); - dma_outb(DMA1_RESET_REG, 0); - dma_outb(DMA2_RESET_REG, 0); + dma_outb(0, DMA1_RESET_REG); + dma_outb(0, DMA2_RESET_REG); - for (i = 0;i < 8;i++) { + for (i = 0; i < 8; i++) { set_dma_addr(i, 0x000000); /* DMA count is a bit weird so this is not 0 */ set_dma_count(i, 1); @@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state) } static struct sysdev_class i8237_sysdev_class = { - .name = "i8237", - .suspend = i8237A_suspend, - .resume = i8237A_resume, + .name = "i8237", + .suspend = i8237A_suspend, + .resume = i8237A_resume, }; static struct sys_device device_i8237A = { - .id = 0, - .cls = &i8237_sysdev_class, + .id = 0, + .cls = &i8237_sysdev_class, }; static int __init i8237A_init_sysfs(void) @@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void) error = sysdev_register(&device_i8237A); return error; } - device_initcall(i8237A_init_sysfs); -- cgit v1.1 From e3944bfac961cd7fc82f3b3143c55dc375748569 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 13:07:13 -0500 Subject: tracing, x86: fix fixup section to return to original code Impact: fix to prevent a kernel crash on fault If for some reason the pointer to the parent function on the stack takes a fault, the fix up code will not return back to the original faulting code. This can lead to unpredictable results and perhaps even a kernel panic. A fault should not happen, but if it does, we should simply disable the tracer, warn, and continue running the kernel. It should not lead to a kernel crash. Signed-off-by: Steven Rostedt --- arch/x86/kernel/ftrace.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 1b43086..9d549e4 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -491,13 +491,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) "1: " _ASM_MOV " (%[parent_old]), %[old]\n" "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" " movl $0, %[faulted]\n" + "3:\n" ".section .fixup, \"ax\"\n" - "3: movl $1, %[faulted]\n" + "4: movl $1, %[faulted]\n" + " jmp 3b\n" ".previous\n" - _ASM_EXTABLE(1b, 3b) - _ASM_EXTABLE(2b, 3b) + _ASM_EXTABLE(1b, 4b) + _ASM_EXTABLE(2b, 4b) : [parent_replaced] "=r" (parent), [old] "=r" (old), [faulted] "=r" (faulted) -- cgit v1.1 From f47a454db9129d2e61b224a40f4365cdd4f83042 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 10 Feb 2009 11:53:23 -0500 Subject: tracing, x86: fix constraint for parent variable The constraint used for retrieving and restoring the parent function pointer is incorrect. The parent variable is a pointer, and the address of the pointer is modified by the asm statement and not the pointer itself. It is incorrect to pass it in as an output constraint since the asm will never update the pointer. Signed-off-by: Steven Rostedt Signed-off-by: Ingo Molnar --- arch/x86/kernel/ftrace.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 9d549e4..231bdd3 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -488,8 +488,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) * ignore such a protection. */ asm volatile( - "1: " _ASM_MOV " (%[parent_old]), %[old]\n" - "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" + "1: " _ASM_MOV " (%[parent]), %[old]\n" + "2: " _ASM_MOV " %[return_hooker], (%[parent])\n" " movl $0, %[faulted]\n" "3:\n" @@ -501,9 +501,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) _ASM_EXTABLE(1b, 4b) _ASM_EXTABLE(2b, 4b) - : [parent_replaced] "=r" (parent), [old] "=r" (old), - [faulted] "=r" (faulted) - : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) + : [old] "=r" (old), [faulted] "=r" (faulted) + : [parent] "r" (parent), [return_hooker] "r" (return_hooker) : "memory" ); -- cgit v1.1