From 379b5441aeb895fe55b877a8a9c187e8728f774c Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 27 Jan 2006 14:02:47 -0800 Subject: x86: align per-cpu section to configured cache bytes This matches the fix for a bug seen on x86-64. Test booted on old hardware that had 32 byte cachelines to begin with. Signed-off-by: Zach Brown Signed-off-by: Sam Ravnborg --- arch/i386/kernel/vmlinux.lds.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 4710195..18f99cc 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -7,6 +7,7 @@ #include #include #include +#include OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -115,7 +116,7 @@ SECTIONS __initramfs_start = .; .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } __initramfs_end = .; - . = ALIGN(32); + . = ALIGN(L1_CACHE_BYTES); __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; -- cgit v1.1 From 2c906ae67b5b2fc3585230b16406400a363b42e4 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 28 Feb 2006 00:36:32 -0500 Subject: [CPUFREQ] Silence powernow-k8 warning on k7's. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e11a092..3b3a949 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -474,8 +474,10 @@ static int check_supported_cpu(unsigned int cpu) goto out; eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if ((eax & CPUID_XFAM) != CPUID_XFAM_K8) + goto out; + if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || - ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); goto out; -- cgit v1.1 From 8ad5496d2359a19127ad9f2eda69485025c9917f Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 28 Feb 2006 00:37:44 -0500 Subject: [CPUFREQ] Remove duplicate cpuinfo struct We already have one of these declared, so use it, instead of declaring a second one for no good reason. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index c173c0f..b0ff907 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -479,15 +479,13 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) unsigned l, h; int ret; int i; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; /* Only Intel makes Enhanced Speedstep-capable CPUs */ if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { + if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) centrino_driver.flags |= CPUFREQ_CONST_LOOPS; - } if (centrino_cpu_init_acpi(policy)) { if (policy->cpu != 0) -- cgit v1.1 From 32ee8c3e470d86588b51dc42ed01e85c5fa0f180 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Tue, 28 Feb 2006 00:43:23 -0500 Subject: [CPUFREQ] Lots of whitespace & CodingStyle cleanup. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/Kconfig | 1 - arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c | 64 ++++----- arch/i386/kernel/cpu/cpufreq/elanfreq.c | 109 ++++++++------- arch/i386/kernel/cpu/cpufreq/gx-suspmod.c | 180 ++++++++++++------------- arch/i386/kernel/cpu/cpufreq/longhaul.h | 4 +- arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | 24 ++-- arch/i386/kernel/cpu/cpufreq/powernow-k6.c | 16 +-- arch/i386/kernel/cpu/cpufreq/powernow-k7.c | 10 +- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 15 +-- arch/i386/kernel/cpu/cpufreq/powernow-k8.h | 6 +- arch/i386/kernel/cpu/cpufreq/speedstep-lib.c | 42 +++--- arch/i386/kernel/cpu/cpufreq/speedstep-lib.h | 20 +-- arch/i386/kernel/cpu/cpufreq/speedstep-smi.c | 49 ++++--- 13 files changed, 257 insertions(+), 283 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index 26892d2..0f1eb50 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -96,7 +96,6 @@ config X86_POWERNOW_K8_ACPI config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on PCI help This add the CPUFreq driver for NatSemi Geode processors which support suspend modulation. diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index 2b62dee..f275e0d 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -39,7 +39,7 @@ static struct pci_dev *nforce2_chipset_dev; static int fid = 0; /* min_fsb, max_fsb: - * minimum and maximum FSB (= FSB at boot time) + * minimum and maximum FSB (= FSB at boot time) */ static int min_fsb = 0; static int max_fsb = 0; @@ -57,10 +57,10 @@ MODULE_PARM_DESC(min_fsb, #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) -/* +/** * nforce2_calc_fsb - calculate FSB * @pll: PLL value - * + * * Calculates FSB from PLL value */ static int nforce2_calc_fsb(int pll) @@ -76,10 +76,10 @@ static int nforce2_calc_fsb(int pll) return 0; } -/* +/** * nforce2_calc_pll - calculate PLL value * @fsb: FSB - * + * * Calculate PLL value for given FSB */ static int nforce2_calc_pll(unsigned int fsb) @@ -106,10 +106,10 @@ static int nforce2_calc_pll(unsigned int fsb) return NFORCE2_PLL(mul, div); } -/* +/** * nforce2_write_pll - write PLL value to chipset * @pll: PLL value - * + * * Writes new FSB PLL value to chipset */ static void nforce2_write_pll(int pll) @@ -121,15 +121,13 @@ static void nforce2_write_pll(int pll) pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp); /* Now write the value in all 64 registers */ - for (temp = 0; temp <= 0x3f; temp++) { - pci_write_config_dword(nforce2_chipset_dev, - NFORCE2_PLLREG, pll); - } + for (temp = 0; temp <= 0x3f; temp++) + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); return; } -/* +/** * nforce2_fsb_read - Read FSB * * Read FSB from chipset @@ -140,39 +138,32 @@ static unsigned int nforce2_fsb_read(int bootfsb) struct pci_dev *nforce2_sub5; u32 fsb, temp = 0; - /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - + 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); if (!nforce2_sub5) return 0; pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); fsb /= 1000000; - + /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev, - NFORCE2_PLLENABLE, (u8 *)&temp); - + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + if(bootfsb || !temp) return fsb; /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev, - NFORCE2_PLLREG, &temp); + pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); fsb = nforce2_calc_fsb(temp); return fsb; } -/* +/** * nforce2_set_fsb - set new FSB * @fsb: New FSB - * + * * Sets new FSB */ static int nforce2_set_fsb(unsigned int fsb) @@ -186,7 +177,7 @@ static int nforce2_set_fsb(unsigned int fsb) printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); return -EINVAL; } - + tfsb = nforce2_fsb_read(0); if (!tfsb) { printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); @@ -194,8 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb) } /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev, - NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); if (!temp) { pll = nforce2_calc_pll(tfsb); @@ -223,7 +213,7 @@ static int nforce2_set_fsb(unsigned int fsb) /* Calculate the PLL reg. value */ if ((pll = nforce2_calc_pll(tfsb)) == -1) return -EINVAL; - + nforce2_write_pll(pll); #ifdef NFORCE2_DELAY mdelay(NFORCE2_DELAY); @@ -239,7 +229,7 @@ static int nforce2_set_fsb(unsigned int fsb) /** * nforce2_get - get the CPU frequency * @cpu: CPU number - * + * * Returns the CPU frequency */ static unsigned int nforce2_get(unsigned int cpu) @@ -354,10 +344,10 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, fid / 10, fid % 10); - + /* Set maximum FSB to FSB at boot time */ max_fsb = nforce2_fsb_read(1); - + if(!max_fsb) return -EIO; @@ -398,17 +388,15 @@ static struct cpufreq_driver nforce2_driver = { * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic * * Detects nForce2 A2 and C1 stepping - * + * */ static unsigned int nforce2_detect_chipset(void) { u8 revision; nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - PCI_DEVICE_ID_NVIDIA_NFORCE2, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); + PCI_DEVICE_ID_NVIDIA_NFORCE2, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (nforce2_chipset_dev == NULL) return -ENODEV; diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c index 3f7caa4..f317276 100644 --- a/arch/i386/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/i386/kernel/cpu/cpufreq/elanfreq.c @@ -1,16 +1,16 @@ /* - * elanfreq: cpufreq driver for the AMD ELAN family + * elanfreq: cpufreq driver for the AMD ELAN family * * (c) Copyright 2002 Robert Schwebel * - * Parts of this code are (c) Sven Geggus + * Parts of this code are (c) Sven Geggus * - * All Rights Reserved. + * All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * 2 of the License, or (at your option) any later version. * * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel * @@ -28,7 +28,7 @@ #include #include -#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ +#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ /* Module parameter */ @@ -41,7 +41,7 @@ struct s_elan_multiplier { }; /* - * It is important that the frequencies + * It is important that the frequencies * are listed in ascending order here! */ struct s_elan_multiplier elan_multiplier[] = { @@ -72,78 +72,79 @@ static struct cpufreq_frequency_table elanfreq_table[] = { * elanfreq_get_cpu_frequency: determine current cpu speed * * Finds out at which frequency the CPU of the Elan SOC runs - * at the moment. Frequencies from 1 to 33 MHz are generated + * at the moment. Frequencies from 1 to 33 MHz are generated * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" - * and have the rest of the chip running with 33 MHz. + * and have the rest of the chip running with 33 MHz. */ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) { - u8 clockspeed_reg; /* Clock Speed Register */ - + u8 clockspeed_reg; /* Clock Speed Register */ + local_irq_disable(); - outb_p(0x80,REG_CSCIR); - clockspeed_reg = inb_p(REG_CSCDR); + outb_p(0x80,REG_CSCIR); + clockspeed_reg = inb_p(REG_CSCDR); local_irq_enable(); - if ((clockspeed_reg & 0xE0) == 0xE0) { return 0; } + if ((clockspeed_reg & 0xE0) == 0xE0) + return 0; - /* Are we in CPU clock multiplied mode (66/99 MHz)? */ - if ((clockspeed_reg & 0xE0) == 0xC0) { - if ((clockspeed_reg & 0x01) == 0) { + /* Are we in CPU clock multiplied mode (66/99 MHz)? */ + if ((clockspeed_reg & 0xE0) == 0xC0) { + if ((clockspeed_reg & 0x01) == 0) return 66000; - } else { - return 99000; - } - } + else + return 99000; + } /* 33 MHz is not 32 MHz... */ if ((clockspeed_reg & 0xE0)==0xA0) return 33000; - return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); + return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); } /** - * elanfreq_set_cpu_frequency: Change the CPU core frequency - * @cpu: cpu number + * elanfreq_set_cpu_frequency: Change the CPU core frequency + * @cpu: cpu number * @freq: frequency in kHz * - * This function takes a frequency value and changes the CPU frequency + * This function takes a frequency value and changes the CPU frequency * according to this. Note that the frequency has to be checked by * elanfreq_validatespeed() for correctness! - * - * There is no return value. + * + * There is no return value. */ -static void elanfreq_set_cpu_state (unsigned int state) { - +static void elanfreq_set_cpu_state (unsigned int state) +{ struct cpufreq_freqs freqs; freqs.old = elanfreq_get_cpu_frequency(0); freqs.new = elan_multiplier[state].clock; freqs.cpu = 0; /* elanfreq.c is UP only driver */ - + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",elan_multiplier[state].clock); + printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", + elan_multiplier[state].clock); - /* - * Access to the Elan's internal registers is indexed via - * 0x22: Chip Setup & Control Register Index Register (CSCI) - * 0x23: Chip Setup & Control Register Data Register (CSCD) + /* + * Access to the Elan's internal registers is indexed via + * 0x22: Chip Setup & Control Register Index Register (CSCI) + * 0x23: Chip Setup & Control Register Data Register (CSCD) * */ - /* - * 0x40 is the Power Management Unit's Force Mode Register. + /* + * 0x40 is the Power Management Unit's Force Mode Register. * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) */ local_irq_disable(); - outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ outb_p(0x00,REG_CSCDR); local_irq_enable(); /* wait till internal pipelines and */ udelay(1000); /* buffers have cleaned up */ @@ -166,10 +167,10 @@ static void elanfreq_set_cpu_state (unsigned int state) { /** * elanfreq_validatespeed: test if frequency range is valid - * @policy: the policy to validate + * @policy: the policy to validate * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. */ static int elanfreq_verify (struct cpufreq_policy *policy) @@ -177,11 +178,11 @@ static int elanfreq_verify (struct cpufreq_policy *policy) return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); } -static int elanfreq_target (struct cpufreq_policy *policy, - unsigned int target_freq, +static int elanfreq_target (struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) { - unsigned int newstate = 0; + unsigned int newstate = 0; if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) return -EINVAL; @@ -212,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) max_freq = elanfreq_get_cpu_frequency(0); /* table init */ - for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { if (elanfreq_table[i].frequency > max_freq) elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; } @@ -226,8 +227,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) if (result) return (result); - cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); - + cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); return 0; } @@ -268,9 +268,9 @@ static struct freq_attr* elanfreq_attr[] = { static struct cpufreq_driver elanfreq_driver = { - .get = elanfreq_get_cpu_frequency, - .verify = elanfreq_verify, - .target = elanfreq_target, + .get = elanfreq_get_cpu_frequency, + .verify = elanfreq_verify, + .target = elanfreq_target, .init = elanfreq_cpu_init, .exit = elanfreq_cpu_exit, .name = "elanfreq", @@ -279,23 +279,21 @@ static struct cpufreq_driver elanfreq_driver = { }; -static int __init elanfreq_init(void) -{ +static int __init elanfreq_init(void) +{ struct cpuinfo_x86 *c = cpu_data; /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) - { + (c->x86 != 4) || (c->x86_model!=10)) { printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); return -ENODEV; } - return cpufreq_register_driver(&elanfreq_driver); } -static void __exit elanfreq_exit(void) +static void __exit elanfreq_exit(void) { cpufreq_unregister_driver(&elanfreq_driver); } @@ -309,4 +307,3 @@ MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); module_init(elanfreq_init); module_exit(elanfreq_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index e86ea48..65b8fa2 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -6,12 +6,12 @@ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation + * version 2 as published by the Free Software Foundation * * The author(s) of this software shall not be held liable for damages * of any nature resulting due to the use of this software. This * software is provided AS-IS with no warranties. - * + * * Theoritical note: * * (see Geode(tm) CS5530 manual (rev.4.1) page.56) @@ -21,18 +21,18 @@ * * Suspend Modulation works by asserting and de-asserting the SUSP# pin * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# - * the CPU enters an idle state. GX1 stops its core clock when SUSP# is + * the CPU enters an idle state. GX1 stops its core clock when SUSP# is * asserted then power consumption is reduced. * - * Suspend Modulation's OFF/ON duration are configurable + * Suspend Modulation's OFF/ON duration are configurable * with 'Suspend Modulation OFF Count Register' * and 'Suspend Modulation ON Count Register'. - * These registers are 8bit counters that represent the number of + * These registers are 8bit counters that represent the number of * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) * to the processor. * - * These counters define a ratio which is the effective frequency - * of operation of the system. + * These counters define a ratio which is the effective frequency + * of operation of the system. * * OFF Count * F_eff = Fgx * ---------------------- @@ -40,24 +40,24 @@ * * 0 <= On Count, Off Count <= 255 * - * From these limits, we can get register values + * From these limits, we can get register values * * off_duration + on_duration <= MAX_DURATION * on_duration = off_duration * (stock_freq - freq) / freq * - * off_duration = (freq * DURATION) / stock_freq - * on_duration = DURATION - off_duration + * off_duration = (freq * DURATION) / stock_freq + * on_duration = DURATION - off_duration * * *--------------------------------------------------------------------------- * * ChangeLog: - * Dec. 12, 2003 Hiroshi Miura - * - fix on/off register mistake - * - fix cpu_khz calc when it stops cpu modulation. + * Dec. 12, 2003 Hiroshi Miura + * - fix on/off register mistake + * - fix cpu_khz calc when it stops cpu modulation. * - * Dec. 11, 2002 Hiroshi Miura - * - rewrite for Cyrix MediaGX Cx5510/5520 and + * Dec. 11, 2002 Hiroshi Miura + * - rewrite for Cyrix MediaGX Cx5510/5520 and * NatSemi Geode Cs5530(A). * * Jul. ??, 2002 Zwane Mwaikambo @@ -74,40 +74,40 @@ ************************************************************************/ #include -#include +#include #include #include #include #include -#include +#include #include /* PCI config registers, all at F0 */ -#define PCI_PMER1 0x80 /* power management enable register 1 */ -#define PCI_PMER2 0x81 /* power management enable register 2 */ -#define PCI_PMER3 0x82 /* power management enable register 3 */ -#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ -#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ -#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ -#define PCI_MODON 0x95 /* suspend modulation ON counter register */ -#define PCI_SUSCFG 0x96 /* suspend configuration register */ +#define PCI_PMER1 0x80 /* power management enable register 1 */ +#define PCI_PMER2 0x81 /* power management enable register 2 */ +#define PCI_PMER3 0x82 /* power management enable register 3 */ +#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ +#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ +#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ +#define PCI_MODON 0x95 /* suspend modulation ON counter register */ +#define PCI_SUSCFG 0x96 /* suspend configuration register */ /* PMER1 bits */ -#define GPM (1<<0) /* global power management */ -#define GIT (1<<1) /* globally enable PM device idle timers */ -#define GTR (1<<2) /* globally enable IO traps */ -#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ -#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ +#define GPM (1<<0) /* global power management */ +#define GIT (1<<1) /* globally enable PM device idle timers */ +#define GTR (1<<2) /* globally enable IO traps */ +#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ +#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ /* SUSCFG bits */ -#define SUSMOD (1<<0) /* enable/disable suspend modulation */ -/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ -#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ - /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ -#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ -/* the belows support only with cs5530A */ -#define PWRSVE_ISA (1<<3) /* stop ISA clock */ -#define PWRSVE (1<<4) /* active idle */ +#define SUSMOD (1<<0) /* enable/disable suspend modulation */ +/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ +#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ + /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ +#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ +/* the belows support only with cs5530A */ +#define PWRSVE_ISA (1<<3) /* stop ISA clock */ +#define PWRSVE (1<<4) /* active idle */ struct gxfreq_params { u8 on_duration; @@ -128,7 +128,7 @@ module_param (pci_busclk, int, 0444); /* maximum duration for which the cpu may be suspended * (32us * MAX_DURATION). If no parameter is given, this defaults - * to 255. + * to 255. * Note that this leads to a maximum of 8 ms(!) where the CPU clock * is suspended -- processing power is just 0.39% of what it used to be, * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ @@ -144,17 +144,17 @@ module_param (max_duration, int, 0444); #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) /** - * we can detect a core multipiler from dir0_lsb - * from GX1 datasheet p.56, - * MULT[3:0]: - * 0000 = SYSCLK multiplied by 4 (test only) - * 0001 = SYSCLK multiplied by 10 - * 0010 = SYSCLK multiplied by 4 - * 0011 = SYSCLK multiplied by 6 - * 0100 = SYSCLK multiplied by 9 - * 0101 = SYSCLK multiplied by 5 - * 0110 = SYSCLK multiplied by 7 - * 0111 = SYSCLK multiplied by 8 + * we can detect a core multipiler from dir0_lsb + * from GX1 datasheet p.56, + * MULT[3:0]: + * 0000 = SYSCLK multiplied by 4 (test only) + * 0001 = SYSCLK multiplied by 10 + * 0010 = SYSCLK multiplied by 4 + * 0011 = SYSCLK multiplied by 6 + * 0100 = SYSCLK multiplied by 9 + * 0101 = SYSCLK multiplied by 5 + * 0110 = SYSCLK multiplied by 7 + * 0111 = SYSCLK multiplied by 8 * of 33.3MHz **/ static int gx_freq_mult[16] = { @@ -164,17 +164,17 @@ static int gx_freq_mult[16] = { /**************************************************************** - * Low Level chipset interface * + * Low Level chipset interface * ****************************************************************/ static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, - { 0, }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, + { 0, }, }; /** - * gx_detect_chipset: + * gx_detect_chipset: * **/ static __init struct pci_dev *gx_detect_chipset(void) @@ -182,17 +182,16 @@ static __init struct pci_dev *gx_detect_chipset(void) struct pci_dev *gx_pci = NULL; /* check if CPU is a MediaGX or a Geode. */ - if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && + if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { dprintk("error: no MediaGX/Geode processor found!\n"); - return NULL; + return NULL; } /* detect which companion chip is used */ while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { - if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) { + if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) return gx_pci; - } } dprintk("error: no supported chipset found!\n"); @@ -200,24 +199,24 @@ static __init struct pci_dev *gx_detect_chipset(void) } /** - * gx_get_cpuspeed: + * gx_get_cpuspeed: * * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. */ static unsigned int gx_get_cpuspeed(unsigned int cpu) { - if ((gx_params->pci_suscfg & SUSMOD) == 0) + if ((gx_params->pci_suscfg & SUSMOD) == 0) return stock_freq; - return (stock_freq * gx_params->off_duration) + return (stock_freq * gx_params->off_duration) / (gx_params->on_duration + gx_params->off_duration); } /** * gx_validate_speed: * determine current cpu speed - * -**/ + * + **/ static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) { @@ -230,7 +229,7 @@ static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off *on_duration=0; for (i=max_duration; i>0; i--) { - tmp_off = ((khz * i) / stock_freq) & 0xff; + tmp_off = ((khz * i) / stock_freq) & 0xff; tmp_on = i - tmp_off; tmp_freq = (stock_freq * tmp_off) / i; /* if this relation is closer to khz, use this. If it's equal, @@ -247,18 +246,17 @@ static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off /** - * gx_set_cpuspeed: - * set cpu speed in khz. + * gx_set_cpuspeed: + * set cpu speed in khz. **/ static void gx_set_cpuspeed(unsigned int khz) { - u8 suscfg, pmer1; + u8 suscfg, pmer1; unsigned int new_khz; unsigned long flags; struct cpufreq_freqs freqs; - freqs.cpu = 0; freqs.old = gx_get_cpuspeed(0); @@ -303,18 +301,18 @@ static void gx_set_cpuspeed(unsigned int khz) pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); - pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); + pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); + pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); - local_irq_restore(flags); + local_irq_restore(flags); gx_params->pci_suscfg = suscfg; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", - gx_params->on_duration * 32, gx_params->off_duration * 32); - dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); + dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", + gx_params->on_duration * 32, gx_params->off_duration * 32); + dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); } /**************************************************************** @@ -322,10 +320,10 @@ static void gx_set_cpuspeed(unsigned int khz) ****************************************************************/ /* - * cpufreq_gx_verify: test if frequency range is valid + * cpufreq_gx_verify: test if frequency range is valid * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. */ static int cpufreq_gx_verify(struct cpufreq_policy *policy) @@ -333,8 +331,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) unsigned int tmp_freq = 0; u8 tmp1, tmp2; - if (!stock_freq || !policy) - return -EINVAL; + if (!stock_freq || !policy) + return -EINVAL; policy->cpu = 0; cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); @@ -342,14 +340,14 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) /* it needs to be assured that at least one supported frequency is * within policy->min and policy->max. If it is not, policy->max * needs to be increased until one freuqency is supported. - * policy->min may not be decreased, though. This way we guarantee a + * policy->min may not be decreased, though. This way we guarantee a * specific processing capacity. */ tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); - if (tmp_freq < policy->min) + if (tmp_freq < policy->min) tmp_freq += stock_freq / max_duration; policy->min = tmp_freq; - if (policy->min > policy->max) + if (policy->min > policy->max) policy->max = tmp_freq; tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); if (tmp_freq > policy->max) @@ -358,12 +356,12 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) if (policy->max < policy->min) policy->max = policy->min; cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); - + return 0; } /* - * cpufreq_gx_target: + * cpufreq_gx_target: * */ static int cpufreq_gx_target(struct cpufreq_policy *policy, @@ -373,8 +371,8 @@ static int cpufreq_gx_target(struct cpufreq_policy *policy, u8 tmp1, tmp2; unsigned int tmp_freq; - if (!stock_freq || !policy) - return -EINVAL; + if (!stock_freq || !policy) + return -EINVAL; policy->cpu = 0; @@ -431,7 +429,7 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) return 0; } -/* +/* * cpufreq_gx_init: * MediaGX/Geode GX initialize cpufreq driver */ @@ -452,7 +450,7 @@ static int __init cpufreq_gx_init(void) u32 class_rev; /* Test if we have the right hardware */ - if ((gx_pci = gx_detect_chipset()) == NULL) + if ((gx_pci = gx_detect_chipset()) == NULL) return -ENODEV; /* check whether module parameters are sane */ @@ -478,7 +476,7 @@ static int __init cpufreq_gx_init(void) pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev); params->pci_rev = class_rev && 0xff; - if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { + if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { kfree(params); return ret; /* register error! */ } diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index 2a495c1..d3a95d77 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h @@ -234,7 +234,7 @@ static int __initdata ezrat_eblcr[32] = { /* * VIA C3 Nehemiah */ - + static int __initdata nehemiah_a_clock_ratio[32] = { 100, /* 0000 -> 10.0x */ 160, /* 0001 -> 16.0x */ @@ -446,7 +446,7 @@ static int __initdata nehemiah_c_eblcr[32] = { /* end of table */ }; -/* +/* * Voltage scales. Div/Mod by 1000 to get actual voltage. * Which scale to use depends on the VRM type in use. */ diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index cc73a7a..0e1fc5c 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -14,7 +14,7 @@ * The author(s) of this software shall not be held liable for damages * of any nature resulting due to the use of this software. This * software is provided AS-IS with no warranties. - * + * * Date Errata Description * 20020525 N44, O17 12.5% or 25% DC causes lockup * @@ -22,7 +22,7 @@ #include #include -#include +#include #include #include #include @@ -30,7 +30,7 @@ #include #include /* current / set_cpus_allowed() */ -#include +#include #include #include @@ -79,7 +79,7 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) } else { dprintk("CPU#%d setting duty cycle to %d%%\n", cpu, ((125 * newstate) / 10)); - /* bits 63 - 5 : reserved + /* bits 63 - 5 : reserved * bit 4 : enable/disable * bits 3-1 : duty cycle * bit 0 : reserved @@ -132,7 +132,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, } /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software - * Developer's Manual, Volume 3 + * Developer's Manual, Volume 3 */ cpus_allowed = current->cpus_allowed; @@ -206,7 +206,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); } - + static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) { @@ -234,7 +234,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) dprintk("has errata -- disabling frequencies lower than 2ghz\n"); break; } - + /* get max frequency */ stock_freq = cpufreq_p4_get_frequency(c); if (!stock_freq) @@ -250,7 +250,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) p4clockmod_table[i].frequency = (stock_freq * i)/8; } cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); - + /* cpuinfo and default policy values */ policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = 1000000; /* assumed */ @@ -262,7 +262,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) { - cpufreq_frequency_table_put_attr(policy->cpu); + cpufreq_frequency_table_put_attr(policy->cpu); return 0; } @@ -298,7 +298,7 @@ static struct freq_attr* p4clockmod_attr[] = { }; static struct cpufreq_driver p4clockmod_driver = { - .verify = cpufreq_p4_verify, + .verify = cpufreq_p4_verify, .target = cpufreq_p4_target, .init = cpufreq_p4_cpu_init, .exit = cpufreq_p4_cpu_exit, @@ -310,12 +310,12 @@ static struct cpufreq_driver p4clockmod_driver = { static int __init cpufreq_p4_init(void) -{ +{ struct cpuinfo_x86 *c = cpu_data; int ret; /* - * THERM_CONTROL is architectural for IA32 now, so + * THERM_CONTROL is architectural for IA32 now, so * we can rely on the capability checks */ if (c->x86_vendor != X86_VENDOR_INTEL) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c index 222f8cf..f895240 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include @@ -50,7 +50,7 @@ static int powernow_k6_get_cpu_multiplier(void) { u64 invalue = 0; u32 msrval; - + msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ invalue=inl(POWERNOW_IOPORT + 0x8); @@ -81,7 +81,7 @@ static void powernow_k6_set_state (unsigned int best_i) freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); freqs.new = busfreq * clock_ratio[best_i].index; freqs.cpu = 0; /* powernow-k6.c is UP only driver */ - + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); /* we now need to transform best_i to the BVC format, see AMD#23446 */ @@ -152,7 +152,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) busfreq = cpu_khz / max_multiplier; /* table init */ - for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { if (clock_ratio[i].index > max_multiplier) clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; else @@ -182,7 +182,7 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) powernow_k6_set_state(i); } cpufreq_frequency_table_put_attr(policy->cpu); - return 0; + return 0; } static unsigned int powernow_k6_get(unsigned int cpu) @@ -196,8 +196,8 @@ static struct freq_attr* powernow_k6_attr[] = { }; static struct cpufreq_driver powernow_k6_driver = { - .verify = powernow_k6_verify, - .target = powernow_k6_target, + .verify = powernow_k6_verify, + .target = powernow_k6_target, .init = powernow_k6_cpu_init, .exit = powernow_k6_cpu_exit, .get = powernow_k6_get, @@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = { * on success. */ static int __init powernow_k6_init(void) -{ +{ struct cpuinfo_x86 *c = cpu_data; if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index edcd626..2bf4237 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -199,8 +199,8 @@ static int get_ranges (unsigned char *pst) powernow_table[j].index |= (vid << 8); /* upper 8 bits */ dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed/1000, vid, + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed/1000, vid, mobile_vid_table[vid]/1000, mobile_vid_table[vid]%1000); } @@ -368,8 +368,8 @@ static int powernow_acpi_init(void) } dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed/1000, vid, + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed/1000, vid, mobile_vid_table[vid]/1000, mobile_vid_table[vid]%1000); @@ -460,7 +460,7 @@ static int powernow_decode_bios (int maxfid, int startvid) (maxfid==pst->maxfid) && (startvid==pst->startvid)) { dprintk ("PST:%d (@%p)\n", i, pst); - dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", + dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); ret = get_ranges ((char *) pst + sizeof (struct pst_s)); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 3b3a949..e85e905 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -83,11 +83,10 @@ static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid) */ static u32 convert_fid_to_vco_fid(u32 fid) { - if (fid < HI_FID_TABLE_BOTTOM) { + if (fid < HI_FID_TABLE_BOTTOM) return 8 + (2 * fid); - } else { + else return fid; - } } /* @@ -177,7 +176,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) if (i++ > 100) { printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); return 1; - } + } } while (query_current_values_with_pending_wait(data)); count_off_irt(data); @@ -782,9 +781,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* verify only 1 entry from the lo frequency table */ if (fid < HI_FID_TABLE_BOTTOM) { if (cntlofreq) { - /* if both entries are the same, ignore this - * one... - */ + /* if both entries are the same, ignore this one ... */ if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || (powernow_table[i].index != powernow_table[cntlofreq].index)) { printk(KERN_ERR PFX "Too many lo freq table entries\n"); @@ -856,7 +853,7 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in find_psb_table, vid are + * the cpufreq frequency table in find_psb_table, vid are * the upper 8 bits. */ @@ -1050,7 +1047,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) pol->governor = CPUFREQ_DEFAULT_GOVERNOR; pol->cpus = cpu_core_map[pol->cpu]; - /* Take a crude guess here. + /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) + (3 * (1 << data->irt) * 10)) * 1000; diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index d0de37d..00ea899c 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -63,7 +63,7 @@ struct powernow_k8_data { #define MSR_C_LO_VID_SHIFT 8 /* Field definitions within the FID VID High Control MSR : */ -#define MSR_C_HI_STP_GNT_TO 0x000fffff +#define MSR_C_HI_STP_GNT_TO 0x000fffff /* Field definitions within the FID VID Low Status MSR : */ #define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ @@ -123,7 +123,7 @@ struct powernow_k8_data { * Most values of interest are enocoded in a single field of the _PSS * entries: the "control" value. */ - + #define IRT_SHIFT 30 #define RVO_SHIFT 28 #define EXT_TYPE_SHIFT 27 @@ -185,7 +185,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned #ifndef for_each_cpu_mask #define for_each_cpu_mask(i,mask) for (i=0;i<1;i++) #endif - + #ifdef CONFIG_SMP static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) { diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index 7c47005..4f46cac 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include #include @@ -36,8 +36,8 @@ static unsigned int pentium3_get_frequency (unsigned int processor) /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { unsigned int ratio; /* Frequency Multiplier (x10) */ - u8 bitmap; /* power on configuration bits - [27, 25:22] (in MSR 0x2a) */ + u8 bitmap; /* power on configuration bits + [27, 25:22] (in MSR 0x2a) */ } msr_decode_mult [] = { { 30, 0x01 }, { 35, 0x05 }, @@ -58,9 +58,9 @@ static unsigned int pentium3_get_frequency (unsigned int processor) /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ struct { - unsigned int value; /* Front Side Bus speed in MHz */ - u8 bitmap; /* power on configuration bits [18: 19] - (in MSR 0x2a) */ + unsigned int value; /* Front Side Bus speed in MHz */ + u8 bitmap; /* power on configuration bits [18: 19] + (in MSR 0x2a) */ } msr_decode_fsb [] = { { 66, 0x0 }, { 100, 0x2 }, @@ -68,8 +68,8 @@ static unsigned int pentium3_get_frequency (unsigned int processor) { 0, 0xff} }; - u32 msr_lo, msr_tmp; - int i = 0, j = 0; + u32 msr_lo, msr_tmp; + int i = 0, j = 0; /* read MSR 0x2a - we only need the low 32 bits */ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); @@ -106,7 +106,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) static unsigned int pentiumM_get_frequency(void) { - u32 msr_lo, msr_tmp; + u32 msr_lo, msr_tmp; rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); @@ -134,7 +134,7 @@ static unsigned int pentium4_get_frequency(void) dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); - /* decode the FSB: see IA-32 Intel (C) Architecture Software + /* decode the FSB: see IA-32 Intel (C) Architecture Software * Developer's Manual, Volume 3: System Prgramming Guide, * revision #12 in Table B-1: MSRs in the Pentium 4 and * Intel Xeon Processors, on page B-4 and B-5. @@ -170,7 +170,7 @@ static unsigned int pentium4_get_frequency(void) return (fsb * mult); } - + unsigned int speedstep_get_processor_frequency(unsigned int processor) { switch (processor) { @@ -198,11 +198,11 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); unsigned int speedstep_detect_processor (void) { struct cpuinfo_x86 *c = cpu_data; - u32 ebx, msr_lo, msr_hi; + u32 ebx, msr_lo, msr_hi; dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); - if ((c->x86_vendor != X86_VENDOR_INTEL) || + if ((c->x86_vendor != X86_VENDOR_INTEL) || ((c->x86 != 6) && (c->x86 != 0xF))) return 0; @@ -218,15 +218,15 @@ unsigned int speedstep_detect_processor (void) dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); switch (c->x86_mask) { - case 4: + case 4: /* - * B-stepping [M-P4-M] + * B-stepping [M-P4-M] * sample has ebx = 0x0f, production has 0x0e. */ if ((ebx == 0x0e) || (ebx == 0x0f)) return SPEEDSTEP_PROCESSOR_P4M; break; - case 7: + case 7: /* * C-stepping [M-P4-M] * needs to have ebx=0x0e, else it's a celeron: @@ -253,7 +253,7 @@ unsigned int speedstep_detect_processor (void) * also, M-P4M HTs have ebx=0x8, too * For now, they are distinguished by the model_id string */ - if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) + if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) return SPEEDSTEP_PROCESSOR_P4M; break; default: @@ -264,8 +264,7 @@ unsigned int speedstep_detect_processor (void) switch (c->x86_model) { case 0x0B: /* Intel PIII [Tualatin] */ - /* cpuid_ebx(1) is 0x04 for desktop PIII, - 0x06 for mobile PIII-M */ + /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ ebx = cpuid_ebx(0x00000001); dprintk("ebx is %x\n", ebx); @@ -275,9 +274,8 @@ unsigned int speedstep_detect_processor (void) return 0; /* So far all PIII-M processors support SpeedStep. See - * Intel's 24540640.pdf of June 2003 + * Intel's 24540640.pdf of June 2003 */ - return SPEEDSTEP_PROCESSOR_PIII_T; case 0x08: /* Intel PIII [Coppermine] */ @@ -399,7 +397,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, } } - out: +out: local_irq_restore(flags); return (ret); } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h index 6a727fd..b735429 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h @@ -14,7 +14,7 @@ #define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ #define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ +#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ #define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected @@ -25,8 +25,8 @@ /* speedstep states -- only two of them */ -#define SPEEDSTEP_HIGH 0x00000000 -#define SPEEDSTEP_LOW 0x00000001 +#define SPEEDSTEP_HIGH 0x00000000 +#define SPEEDSTEP_LOW 0x00000001 /* detect a speedstep-capable processor */ @@ -36,13 +36,13 @@ extern unsigned int speedstep_detect_processor (void); extern unsigned int speedstep_get_processor_frequency(unsigned int processor); -/* detect the low and high speeds of the processor. The callback - * set_state"'s first argument is either SPEEDSTEP_HIGH or - * SPEEDSTEP_LOW; the second argument is zero so that no +/* detect the low and high speeds of the processor. The callback + * set_state"'s first argument is either SPEEDSTEP_HIGH or + * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ extern unsigned int speedstep_get_freqs(unsigned int processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)); + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 28cc5d5..336ba49 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -13,8 +13,8 @@ *********************************************************************/ #include -#include -#include +#include +#include #include #include #include @@ -28,21 +28,21 @@ * * These parameters are got from IST-SMI BIOS call. * If user gives it, these are used. - * + * */ -static int smi_port = 0; -static int smi_cmd = 0; -static unsigned int smi_sig = 0; +static int smi_port = 0; +static int smi_cmd = 0; +static unsigned int smi_sig = 0; /* info about the processor */ -static unsigned int speedstep_processor = 0; +static unsigned int speedstep_processor = 0; -/* - * There are only two frequency states for each processor. Values +/* + * There are only two frequency states for each processor. Values * are in kHz for the time being. */ static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_HIGH, 0}, {SPEEDSTEP_LOW, 0}, {0, CPUFREQ_TABLE_END}, }; @@ -123,7 +123,7 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) *low = low_mhz * 1000; return result; -} +} /** * speedstep_get_state - set the SpeedStep state @@ -204,7 +204,7 @@ static void speedstep_set_state (unsigned int state) * speedstep_target - set a new CPUFreq policy * @policy: new policy * @target_freq: new freq - * @relation: + * @relation: * * Sets a new CPUFreq policy/freq. */ @@ -283,7 +283,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) state = speedstep_get_state(); speed = speedstep_freqs[state].frequency; - dprintk("currently at %s speed setting - %i MHz\n", + dprintk("currently at %s speed setting - %i MHz\n", (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", (speed / 1000)); @@ -296,7 +296,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) if (result) return (result); - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); return 0; } @@ -332,8 +332,8 @@ static struct freq_attr* speedstep_attr[] = { static struct cpufreq_driver speedstep_driver = { .name = "speedstep-smi", - .verify = speedstep_verify, - .target = speedstep_target, + .verify = speedstep_verify, + .target = speedstep_target, .init = speedstep_cpu_init, .exit = speedstep_cpu_exit, .get = speedstep_get, @@ -370,13 +370,12 @@ static int __init speedstep_init(void) return -ENODEV; } - dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", + dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); - - /* Error if no IST-SMI BIOS or no PARM + /* Error if no IST-SMI BIOS or no PARM sig= 'ISGE' aka 'Intel Speedstep Gate E' */ - if ((ist_info.signature != 0x47534943) && ( + if ((ist_info.signature != 0x47534943) && ( (smi_port == 0) || (smi_cmd == 0))) return -ENODEV; @@ -386,17 +385,15 @@ static int __init speedstep_init(void) smi_sig = ist_info.signature; /* setup smi_port from MODLULE_PARM or BIOS */ - if ((smi_port > 0xff) || (smi_port < 0)) { + if ((smi_port > 0xff) || (smi_port < 0)) return -EINVAL; - } else if (smi_port == 0) { + else if (smi_port == 0) smi_port = ist_info.command & 0xff; - } - if ((smi_cmd > 0xff) || (smi_cmd < 0)) { + if ((smi_cmd > 0xff) || (smi_cmd < 0)) return -EINVAL; - } else if (smi_cmd == 0) { + else if (smi_cmd == 0) smi_cmd = (ist_info.command >> 16) & 0xff; - } return cpufreq_register_driver(&speedstep_driver); } -- cgit v1.1 From 2a1c1c877ecb446dbdf1715248e151db8719a87b Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 5 Mar 2006 03:35:00 -0500 Subject: [CPUFREQ] powernow-k8: Let cpufreq driver handle affected CPUs powernow-k8: Let cpufreq driver handle affected CPUs Let the cpufreq driver manage AMD Dual-Core CPUs being tied together. Since cpufreq driver's affected CPUs data, cpufreq_policy->cpus, already knows about which cores are tied together, powernow driver does not have keep its internal data for every core. (even a pointer.. it will never be called on) Telling cpufreq driver about cpu_core_map at init time is sufficient. Signed-off-by: Jacob Shin Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e85e905..9a8ec03c 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -45,7 +45,7 @@ #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.60.0" +#define VERSION "version 1.60.1" #include "powernow-k8.h" /* serialize freq changes */ @@ -908,7 +908,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi u32 checkvid = data->currvid; unsigned int newstate; int ret = -EIO; - int i; /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -954,12 +953,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi up(&fidvid_sem); goto err_out; } - - /* Update all the fid/vids of our siblings */ - for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { - powernow_data[i]->currvid = data->currvid; - powernow_data[i]->currfid = data->currfid; - } up(&fidvid_sem); pol->cur = find_khz_freq_from_fid(data->currfid); @@ -983,7 +976,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask = CPU_MASK_ALL; - int rc, i; + int rc; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1069,9 +1062,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) printk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { - powernow_data[i] = data; - } + powernow_data[pol->cpu] = data; return 0; -- cgit v1.1 From 6d373ea012b2974e627b9ee830e75cf3bf3c4c24 Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Thu, 9 Mar 2006 20:07:44 -0800 Subject: [CPUFREQ] Fix the p4-clockmod N60 errata workaround. Fix the code to disable freqs less than 2GHz in N60 errata. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/p4-clockmod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index 0e1fc5c..ab6504e 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -244,7 +244,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { if ((i<2) && (has_N44_O17_errata[policy->cpu])) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; - else if (has_N60_errata[policy->cpu] && p4clockmod_table[i].frequency < 2000000) + else if (has_N60_errata[policy->cpu] && ((stock_freq * i)/8) < 2000000) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; else p4clockmod_table[i].frequency = (stock_freq * i)/8; -- cgit v1.1 From 5e8fb97163c7bf2b370290b4924babc361b05dde Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 11 Mar 2006 16:03:16 -0500 Subject: [CPUFREQ] Mark longhaul driver as broken. This seems to work for a short period of time, but when used in conjunction with a userspace governor that changes the frequency regularly, it's only a matter of time before everything just locks up. Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index 0f1eb50..f7016a9 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -202,6 +202,7 @@ config X86_LONGRUN config X86_LONGHAUL tristate "VIA Cyrix III Longhaul" select CPU_FREQ_TABLE + depends on BROKEN help This adds the CPUFreq driver for VIA Samuel/CyrixIII, VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T -- cgit v1.1 From 388d6c5180faf83ea06dc2614e8e89910ddef4f0 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 11 Mar 2006 16:04:53 -0500 Subject: [CPUFREQ] Whitespace cleanup Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/Kconfig | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index f7016a9..e44a4c6 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -114,9 +114,9 @@ config X86_SPEEDSTEP_CENTRINO you also need to say Y to "Use ACPI tables to decode..." below [which might imply enabling ACPI] if you want to use this driver on non-Banias CPUs. - + For details, take a look at . - + If in doubt, say N. config X86_SPEEDSTEP_CENTRINO_ACPI @@ -147,7 +147,7 @@ config X86_SPEEDSTEP_ICH help This adds the CPUFreq driver for certain mobile Intel Pentium III (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all - mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, + mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, ICH3 or ICH4 southbridge. For details, take a look at . @@ -160,7 +160,7 @@ config X86_SPEEDSTEP_SMI depends on EXPERIMENTAL help This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) + (Coppermine), all mobile Intel Pentium III-M (Tualatin) on systems which have an Intel 440BX/ZX/MX southbridge. For details, take a look at . @@ -204,8 +204,8 @@ config X86_LONGHAUL select CPU_FREQ_TABLE depends on BROKEN help - This adds the CPUFreq driver for VIA Samuel/CyrixIII, - VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T + This adds the CPUFreq driver for VIA Samuel/CyrixIII, + VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T processors. For details, take a look at . @@ -215,11 +215,11 @@ config X86_LONGHAUL comment "shared options" config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" + bool "/proc/acpi/processor/../performance interface (deprecated)" depends on PROC_FS depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI help - This enables the deprecated /proc/acpi/processor/../performance + This enables the deprecated /proc/acpi/processor/../performance interface. While it is helpful for debugging, the generic, cross-architecture cpufreq interfaces should be used. @@ -233,9 +233,9 @@ config X86_SPEEDSTEP_RELAXED_CAP_CHECK bool "Relaxed speedstep capability checks" depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) help - Don't perform all checks for a speedstep capable system which would - normally be done. Some ancient or strange systems, though speedstep - capable, don't always indicate that they are speedstep capable. This + Don't perform all checks for a speedstep capable system which would + normally be done. Some ancient or strange systems, though speedstep + capable, don't always indicate that they are speedstep capable. This option lets the probing code bypass some of those checks if the parameter "relaxed_check=1" is passed to the module. -- cgit v1.1 From 84f0b1ef8c013b33dea9bbc2521094dd29110cf0 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sat, 11 Mar 2006 16:13:56 -0500 Subject: [CPUFREQ] kzalloc conversion for gx-suspmod Signed-off-by: Dave Jones --- arch/i386/kernel/cpu/cpufreq/gx-suspmod.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index 65b8fa2..92afa3b 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -459,10 +459,9 @@ static int __init cpufreq_gx_init(void) dprintk("geode suspend modulation available.\n"); - params = kmalloc(sizeof(struct gxfreq_params), GFP_KERNEL); + params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); if (params == NULL) return -ENOMEM; - memset(params, 0, sizeof(struct gxfreq_params)); params->cs55x0 = gx_pci; gx_params = params; -- cgit v1.1 From 67963132638e67ad3c5aa16765e6f3f2f3cdd85c Mon Sep 17 00:00:00 2001 From: Maneesh Soni Date: Tue, 14 Mar 2006 15:03:14 +0530 Subject: [PATCH] Plug kdump shutdown race window lapic_shutdown() re-enables interrupts which is un-desirable for panic case, so use local_irq_save() and local_irq_restore() to keep the irqs disabled for kexec on panic case, and close a possible race window while kdump shutdown as shown in this stack trace -- BUG: spinlock lockup on CPU#1, bash/4396, c52781a0 [] _raw_spin_lock+0xb7/0xd2 [] _spin_lock+0x6/0x8 [] scheduler_tick+0xe7/0x328 [] update_process_times+0x51/0x5d [] smp_apic_timer_interrupt+0x4f/0x58 [] lapic_shutdown+0x76/0x7e [] apic_timer_interrupt+0x1c/0x30 [] lapic_shutdown+0x76/0x7e [] machine_crash_shutdown+0x83/0xaa [] crash_kexec+0xc1/0xe3 [] _spin_lock+0x6/0x8 [] crash_kexec+0xad/0xe3 [] __handle_sysrq+0x84/0xfd [] write_sysrq_trigger+0x2c/0x35 [] vfs_write+0xa2/0x13b [] sys_write+0x3b/0x64 [] syscall_call+0x7/0xb Signed-off-by: Maneesh Soni Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index f39e09e..776c909 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -570,16 +570,18 @@ void __devinit setup_local_APIC(void) */ void lapic_shutdown(void) { + unsigned long flags; + if (!cpu_has_apic) return; - local_irq_disable(); + local_irq_save(flags); clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); - local_irq_enable(); + local_irq_restore(flags); } #ifdef CONFIG_PM -- cgit v1.1 From 82c3c03a4096badd026c6e337f3c5dde020e9ec6 Mon Sep 17 00:00:00 2001 From: Srivatsa Vaddagiri Date: Thu, 16 Mar 2006 23:04:06 -0800 Subject: [PATCH] x86: check for online cpus before bringing them up Bryce reported a bug wherein offlining CPU0 (on x86 box) and then subsequently onlining it resulted in a lockup. On x86, CPU0 is never offlined. The subsequent attempt to online CPU0 doesn't take that into account. It actually tries to bootup the already booted CPU. Following patch fixes the problem (as acknowledged by Bryce). Please consider for inclusion in 2.6.16. Check if cpu is already online. Signed-off-by: Srivatsa Vaddagiri Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index eba7f53..7007e17 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1029,6 +1029,16 @@ int __devinit smp_prepare_cpu(int cpu) int apicid, ret; lock_cpu_hotplug(); + + /* + * On x86, CPU0 is never offlined. Trying to bring up an + * already-booted CPU will hang. So check for that case. + */ + if (cpu_online(cpu)) { + ret = -EINVAL; + goto exit; + } + apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { ret = -ENODEV; -- cgit v1.1 From 7e7f8a036b8e2b2a300df016da5e7128c8a9192e Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Tue, 31 Jan 2006 16:56:28 -0500 Subject: [PATCH] make vm86 call audit_syscall_exit hi, The motivation behind the patch below was to address messages in /var/log/messages such as: Jan 31 10:54:15 mets kernel: audit(:0): major=252 name_count=0: freeing multiple contexts (1) Jan 31 10:54:15 mets kernel: audit(:0): major=113 name_count=0: freeing multiple contexts (2) I can reproduce by running 'get-edid' from: http://john.fremlin.de/programs/linux/read-edid/. These messages come about in the log b/c the vm86 calls do not exit via the normal system call exit paths and thus do not call 'audit_syscall_exit'. The next system call will then free the context for itself and for the vm86 context, thus generating the above messages. This patch addresses the issue by simply adding a call to 'audit_syscall_exit' from the vm86 code. Besides fixing the above error messages the patch also now allows vm86 system calls to become auditable. This is useful since strace does not appear to properly record the return values from sys_vm86. I think this patch is also a step in the right direction in terms of cleaning up some core auditing code. If we can correct any other paths that do not properly call the audit exit and entries points, then we can also eliminate the notion of context chaining. I've tested this patch by verifying that the log messages no longer appear, and that the audit records for sys_vm86 appear to be correct. Also, 'read_edid' produces itentical output. thanks, -Jason Signed-off-by: Jason Baron Signed-off-by: Al Viro --- arch/i386/kernel/vm86.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index f51c894..aee14fa 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -252,6 +253,7 @@ out: static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) { struct tss_struct *tss; + long eax; /* * make sure the vm86() system call doesn't try to do anything silly */ @@ -305,13 +307,19 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); + __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t"); + __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax)); + + /*call audit_syscall_exit since we do not exit via the normal paths */ + if (unlikely(current->audit_context)) + audit_syscall_exit(current, AUDITSC_RESULT(eax), eax); + __asm__ __volatile__( - "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" "movl %0,%%esp\n\t" "movl %1,%%ebp\n\t" "jmp resume_userspace" : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax"); + :"r" (&info->regs), "r" (task_thread_info(tsk))); /* we never return here */ } -- cgit v1.1 From 4078006568c142a909e7889cbdc28804cec25461 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Mar 2006 00:07:35 -0800 Subject: [PATCH] efi_call_phys_epilog() warning fix arch/i386/kernel/efi.c: In function `efi_call_phys_epilog': arch/i386/kernel/efi.c:118: warning: assignment makes integer from pointer without a cast Cc: Matt Domsch Cc: "Tolentino, Matthew E" Cc: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index c9cad7b..aeabb41 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void) unsigned long cr4; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - cpu_gdt_descr->address = __va(cpu_gdt_descr->address); + cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); load_gdt(cpu_gdt_descr); cr4 = read_cr4(); -- cgit v1.1 From 68ed0040a8c9d06b73cda322a1f740749bd6e41a Mon Sep 17 00:00:00 2001 From: Ravikiran G Thirumalai Date: Wed, 22 Mar 2006 00:07:38 -0800 Subject: [PATCH] x86: mark cyc2ns_scale readmostly This variable is rarely written to. Mark the variable accordingly. Signed-off-by: Ravikiran Thirumalai Signed-off-by: Shai Fultheim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/timers/timer_hpet.c | 2 +- arch/i386/kernel/timers/timer_tsc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index be24272..17a6fe7 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -46,7 +46,7 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale; +static unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index a7f5a2a..5e41ee2 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -74,7 +74,7 @@ late_initcall(start_lost_tick_compensation); * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale; +static unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) -- cgit v1.1 From 78eef01b0fae087c5fadbd85dd4fe2918c3a015f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 22 Mar 2006 00:08:16 -0800 Subject: [PATCH] on_each_cpu(): disable local interrupts When on_each_cpu() runs the callback on other CPUs, it runs with local interrupts disabled. So we should run the function with local interrupts disabled on this CPU, too. And do the same for UP, so the callback is run in the same environment on both UP and SMP. (strictly it should do preempt_disable() too, but I think local_irq_disable is sufficiently equivalent). Also uninlines on_each_cpu(). softirq.c was the most appropriate file I could find, but it doesn't seem to justify creating a new file. Oh, and fix up that comment over (under?) x86's smp_call_function(). It drives me nuts. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smp.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 218d725..d134e96 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -504,27 +504,23 @@ void unlock_ipi_call_lock(void) spin_unlock_irq(&call_lock); } -static struct call_data_struct * call_data; - -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ - -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -/* - * [SUMMARY] Run a function on all other CPUs. - * The function to run. This must be fast and non-blocking. - * An arbitrary pointer to pass to the function. - * currently unused. - * If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. Does not return until +static struct call_data_struct *call_data; + +/** + * smp_call_function(): Run a function on all other CPUs. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: currently unused. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. Does not return until * remote CPUs are nearly ready to execute <> or are or have executed. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) { struct call_data_struct data; int cpus; -- cgit v1.1 From a7290ee08e434399660ace34427c17696e47c562 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Wed, 22 Mar 2006 00:08:44 -0800 Subject: [PATCH] Uninline sys_mmap common code (reduce binary size) Remove the inlining of the new vs old mmap system call common code. This reduces the size of the resulting vmlinux for defconfig as follows: mb@pc1:~/develop/git/linux-2.6$ size vmlinux.mmap* text data bss dec hex filename 3303749 521524 186564 4011837 3d373d vmlinux.mmapinline 3303557 521524 186564 4011645 3d367d vmlinux.mmapnoinline The new sys_mmap2() has also one function call overhead removed, now. (probably it was already optimized to a jmp before, but anyway...) Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/sys_i386.c | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index a4a6197..8fdb1fb 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c @@ -40,14 +40,13 @@ asmlinkage int sys_pipe(unsigned long __user * fildes) return error; } -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { int error = -EBADF; - struct file * file = NULL; + struct file *file = NULL; + struct mm_struct *mm = current->mm; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { @@ -56,9 +55,9 @@ static inline long do_mmap2( goto out; } - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); if (file) fput(file); @@ -66,13 +65,6 @@ out: return error; } -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -101,7 +93,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + err = sys_mmap2(a.addr, a.len, a.prot, a.flags, + a.fd, a.offset >> PAGE_SHIFT); out: return err; } -- cgit v1.1 From 4d7d8c82c181711d28c8336108330a9121f5ef07 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:30 -0800 Subject: [PATCH] i386: multi-column stack backtraces Print stack backtraces in multiple columns, saving screen space. Number of columns is configurable and defaults to one so behavior is backwards-compatible. Also removes the brackets around addresses when printing more that one entry per line so they print as:
instead of: [
] This helps multiple entries fit better on one line. Original idea by Dave Jones, taken from x86_64. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 32 +++++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index b814dbd..ee61988 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -112,12 +112,30 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) p < (void *)tinfo + THREAD_SIZE - 3; } -static void print_addr_and_symbol(unsigned long addr, char *log_lvl) +/* + * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. + */ +static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, + int printed) { - printk(log_lvl); + if (!printed) + printk(log_lvl); + +#if CONFIG_STACK_BACKTRACE_COLS == 1 printk(" [<%08lx>] ", addr); +#else + printk(" <%08lx> ", addr); +#endif print_symbol("%s", addr); - printk("\n"); + + printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; + + if (printed) + printk(" "); + else + printk("\n"); + + return printed; } static inline unsigned long print_context_stack(struct thread_info *tinfo, @@ -125,20 +143,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, char *log_lvl) { unsigned long addr; + int printed = 0; /* nr of entries already printed on current line */ #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); ebp = *(unsigned long *)ebp; } #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); } #endif + if (printed) + printk("\n"); + return ebp; } -- cgit v1.1 From 9a0b5817ad97bb718ab85322759d19a238712b47 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Thu, 23 Mar 2006 02:59:32 -0800 Subject: [PATCH] x86: SMP alternatives Implement SMP alternatives, i.e. switching at runtime between different code versions for UP and SMP. The code can patch both SMP->UP and UP->SMP. The UP->SMP case is useful for CPU hotplug. With CONFIG_CPU_HOTPLUG enabled the code switches to UP at boot time and when the number of CPUs goes down to 1, and switches to SMP when the number of CPUs goes up to 2. Without CONFIG_CPU_HOTPLUG or on non-SMP-capable systems the code is patched once at boot time (if needed) and the tables are released afterwards. The changes in detail: * The current alternatives bits are moved to a separate file, the SMP alternatives code is added there. * The patch adds some new elf sections to the kernel: .smp_altinstructions like .altinstructions, also contains a list of alt_instr structs. .smp_altinstr_replacement like .altinstr_replacement, but also has some space to save original instruction before replaving it. .smp_locks list of pointers to lock prefixes which can be nop'ed out on UP. The first two are used to replace more complex instruction sequences such as spinlocks and semaphores. It would be possible to deal with the lock prefixes with that as well, but by handling them as special case the table sizes become much smaller. * The sections are page-aligned and padded up to page size, so they can be free if they are not needed. * Splitted the code to release init pages to a separate function and use it to release the elf sections if they are unused. Signed-off-by: Gerd Hoffmann Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/Makefile | 2 +- arch/i386/kernel/alternative.c | 321 +++++++++++++++++++++++++++++++++++++++++ arch/i386/kernel/cpu/proc.c | 2 +- arch/i386/kernel/module.c | 32 ++-- arch/i386/kernel/semaphore.c | 8 +- arch/i386/kernel/setup.c | 95 ------------ arch/i386/kernel/smpboot.c | 3 + arch/i386/kernel/vmlinux.lds.S | 20 +++ 8 files changed, 372 insertions(+), 111 deletions(-) create mode 100644 arch/i386/kernel/alternative.c (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 65656c0..5b9ed21 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - quirks.o i8237.o topology.o + quirks.o i8237.o topology.o alternative.o obj-y += cpu/ obj-y += timers/ diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c new file mode 100644 index 0000000..5cbd6f9 --- /dev/null +++ b/arch/i386/kernel/alternative.c @@ -0,0 +1,321 @@ +#include +#include +#include +#include +#include + +#define DEBUG 0 +#if DEBUG +# define DPRINTK(fmt, args...) printk(fmt, args) +#else +# define DPRINTK(fmt, args...) +#endif + +/* Use inline assembly to define this because the nops are defined + as inline assembly strings in the include files and we cannot + get them easily into strings. */ +asm("\t.data\nintelnops: " + GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 + GENERIC_NOP7 GENERIC_NOP8); +asm("\t.data\nk8nops: " + K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 + K8_NOP7 K8_NOP8); +asm("\t.data\nk7nops: " + K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 + K7_NOP7 K7_NOP8); + +extern unsigned char intelnops[], k8nops[], k7nops[]; +static unsigned char *intel_nops[ASM_NOP_MAX+1] = { + NULL, + intelnops, + intelnops + 1, + intelnops + 1 + 2, + intelnops + 1 + 2 + 3, + intelnops + 1 + 2 + 3 + 4, + intelnops + 1 + 2 + 3 + 4 + 5, + intelnops + 1 + 2 + 3 + 4 + 5 + 6, + intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k8_nops[ASM_NOP_MAX+1] = { + NULL, + k8nops, + k8nops + 1, + k8nops + 1 + 2, + k8nops + 1 + 2 + 3, + k8nops + 1 + 2 + 3 + 4, + k8nops + 1 + 2 + 3 + 4 + 5, + k8nops + 1 + 2 + 3 + 4 + 5 + 6, + k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k7_nops[ASM_NOP_MAX+1] = { + NULL, + k7nops, + k7nops + 1, + k7nops + 1 + 2, + k7nops + 1 + 2 + 3, + k7nops + 1 + 2 + 3 + 4, + k7nops + 1 + 2 + 3 + 4 + 5, + k7nops + 1 + 2 + 3 + 4 + 5 + 6, + k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static struct nop { + int cpuid; + unsigned char **noptable; +} noptypes[] = { + { X86_FEATURE_K8, k8_nops }, + { X86_FEATURE_K7, k7_nops }, + { -1, NULL } +}; + + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; +extern u8 *__smp_locks[], *__smp_locks_end[]; + +extern u8 __smp_alt_begin[], __smp_alt_end[]; + + +static unsigned char** find_nop_table(void) +{ + unsigned char **noptable = intel_nops; + int i; + + for (i = 0; noptypes[i].cpuid >= 0; i++) { + if (boot_cpu_has(noptypes[i].cpuid)) { + noptable = noptypes[i].noptable; + break; + } + } + return noptable; +} + +/* Replace instructions with better alternatives for this CPU type. + This runs before SMP is initialized to avoid SMP problems with + self modifying code. This implies that assymetric systems where + APs have less capabilities than the boot processor are not handled. + Tough. Make sure you disable such features by hand. */ + +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + unsigned char **noptable = find_nop_table(); + struct alt_instr *a; + int diff, i, k; + + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + BUG_ON(a->replacementlen > a->instrlen); + if (!boot_cpu_has(a->cpuid)) + continue; + memcpy(a->instr, a->replacement, a->replacementlen); + diff = a->instrlen - a->replacementlen; + /* Pad the rest with nops */ + for (i = a->replacementlen; diff > 0; diff -= k, i += k) { + k = diff; + if (k > ASM_NOP_MAX) + k = ASM_NOP_MAX; + memcpy(a->instr + i, noptable[k], k); + } + } +} + +static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + memcpy(a->replacement + a->replacementlen, + a->instr, + a->instrlen); + } +} + +static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + for (a = start; a < end; a++) { + memcpy(a->instr, + a->replacement + a->replacementlen, + a->instrlen); + } +} + +static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = 0xf0; /* lock prefix */ + }; +} + +static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + unsigned char **noptable = find_nop_table(); + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = noptable[1][0]; + }; +} + +struct smp_alt_module { + /* what is this ??? */ + struct module *mod; + char *name; + + /* ptrs to lock prefixes */ + u8 **locks; + u8 **locks_end; + + /* .text segment, needed to avoid patching init code ;) */ + u8 *text; + u8 *text_end; + + struct list_head next; +}; +static LIST_HEAD(smp_alt_modules); +static DEFINE_SPINLOCK(smp_alt); + +static int smp_alt_once = 0; +static int __init bootonly(char *str) +{ + smp_alt_once = 1; + return 1; +} +__setup("smp-alt-boot", bootonly); + +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ + struct smp_alt_module *smp; + unsigned long flags; + + if (smp_alt_once) { + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(locks, locks_end, + text, text_end); + return; + } + + smp = kzalloc(sizeof(*smp), GFP_KERNEL); + if (NULL == smp) + return; /* we'll run the (safe but slow) SMP code then ... */ + + smp->mod = mod; + smp->name = name; + smp->locks = locks; + smp->locks_end = locks_end; + smp->text = text; + smp->text_end = text_end; + DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", + __FUNCTION__, smp->locks, smp->locks_end, + smp->text, smp->text_end, smp->name); + + spin_lock_irqsave(&smp_alt, flags); + list_add_tail(&smp->next, &smp_alt_modules); + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(smp->locks, smp->locks_end, + smp->text, smp->text_end); + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_module_del(struct module *mod) +{ + struct smp_alt_module *item; + unsigned long flags; + + if (smp_alt_once) + return; + + spin_lock_irqsave(&smp_alt, flags); + list_for_each_entry(item, &smp_alt_modules, next) { + if (mod != item->mod) + continue; + list_del(&item->next); + spin_unlock_irqrestore(&smp_alt, flags); + DPRINTK("%s: %s\n", __FUNCTION__, item->name); + kfree(item); + return; + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_switch(int smp) +{ + struct smp_alt_module *mod; + unsigned long flags; + + if (smp_alt_once) + return; + BUG_ON(!smp && (num_online_cpus() > 1)); + + spin_lock_irqsave(&smp_alt, flags); + if (smp) { + printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); + clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + alternatives_smp_apply(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_lock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } else { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_unlock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + /* switch to patch-once-at-boottime-only mode and free the + * tables in case we know the number of CPUs will never ever + * change */ +#ifdef CONFIG_HOTPLUG_CPU + if (num_possible_cpus() < 2) + smp_alt_once = 1; +#else + smp_alt_once = 1; +#endif + + if (smp_alt_once) { + if (1 == num_possible_cpus()) { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_unlock(__smp_locks, __smp_locks_end, + _text, _etext); + } + free_init_pages("SMP alternatives", + (unsigned long)__smp_alt_begin, + (unsigned long)__smp_alt_end); + } else { + alternatives_smp_save(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_module_add(NULL, "core kernel", + __smp_locks, __smp_locks_end, + _text, _etext); + alternatives_smp_switch(0); + } +} diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 89a85af..5cfbd80 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -40,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index 5149c8a..470cf97 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -104,26 +104,38 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } -extern void apply_alternatives(void *start, void *end); - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - /* look for .altinstructions to patch */ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - void *seg; - if (strcmp(".altinstructions", secstrings + s->sh_name)) - continue; - seg = (void *)s->sh_addr; - apply_alternatives(seg, seg + s->sh_size); - } + if (!strcmp(".text", secstrings + s->sh_name)) + text = s; + if (!strcmp(".altinstructions", secstrings + s->sh_name)) + alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks= s; + } + + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; + apply_alternatives(aseg, aseg + alt->sh_size); + } + if (locks && text) { + void *lseg = (void *)locks->sh_addr; + void *tseg = (void *)text->sh_addr; + alternatives_smp_module_add(me, me->name, + lseg, lseg + locks->sh_size, + tseg, tseg + text->sh_size); + } return 0; } void module_arch_cleanup(struct module *mod) { + alternatives_smp_module_del(mod); } diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c index 7455ab6..967dc74 100644 --- a/arch/i386/kernel/semaphore.c +++ b/arch/i386/kernel/semaphore.c @@ -110,11 +110,11 @@ asm( ".align 4\n" ".globl __write_lock_failed\n" "__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" + LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" "1: rep; nop\n\t" "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jnz __write_lock_failed\n\t" "ret" ); @@ -124,11 +124,11 @@ asm( ".align 4\n" ".globl __read_lock_failed\n" "__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" + LOCK_PREFIX "incl (%eax)\n" "1: rep; nop\n\t" "cmpl $1,(%eax)\n\t" "js 1b\n\t" - LOCK "decl (%eax)\n\t" + LOCK_PREFIX "decl (%eax)\n\t" "js __read_lock_failed\n\t" "ret" ); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ab62a9f..5f58f8c 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1377,101 +1377,6 @@ static void __init register_memory(void) pci_mem_start, gapstart, gapsize); } -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - unsigned char **noptable = intel_nops; - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - for (a = start; (void *)a < end; a++) { - if (!boot_cpu_has(a->cpuid)) - continue; - BUG_ON(a->replacementlen > a->instrlen); - memcpy(a->instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } - } -} - -void __init alternative_instructions(void) -{ - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - static char * __init machine_specific_memory_setup(void); #ifdef CONFIG_MCA diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 7007e17..4c470e9 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -899,6 +899,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned short nmi_high = 0, nmi_low = 0; ++cpucount; + alternatives_smp_switch(1); /* * We can't use kernel_thread since we must avoid to @@ -1368,6 +1369,8 @@ void __cpu_die(unsigned int cpu) /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { printk ("CPU %d is now offline\n", cpu); + if (1 == num_online_cpus()) + alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 4710195..3f21c6f 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -68,6 +68,26 @@ SECTIONS *(.data.init_task) } + /* might get freed after init */ + . = ALIGN(4096); + __smp_alt_begin = .; + __smp_alt_instructions = .; + .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + *(.smp_altinstructions) + } + __smp_alt_instructions_end = .; + . = ALIGN(4); + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) + } + __smp_locks_end = .; + .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { + *(.smp_altinstr_replacement) + } + . = ALIGN(4096); + __smp_alt_end = .; + /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; -- cgit v1.1 From 3bc9b76bede9b3c72088258c7e72eb823f3351d4 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:33 -0800 Subject: [PATCH] i386: __devinit should be __cpuinit Several places in arch/i386/kernel/cpu and kernel/cpu were using __devinit when they should have been __cpuinit. Fixing that saves ~4K when CONFIG_HOTPLUG && !CONFIG_HOTPLUG_CPU. Noticed by Andrew Morton. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 32 ++++++++++++++++---------------- arch/i386/kernel/cpu/intel.c | 12 ++++++------ arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index e6bd095..f63dcfb 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -25,9 +25,9 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); -static int cachesize_override __devinitdata = -1; -static int disable_x86_fxsr __devinitdata = 0; -static int disable_x86_serial_nr __devinitdata = 1; +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata = 0; +static int disable_x86_serial_nr __cpuinitdata = 1; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -59,7 +59,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __devinit get_model_name(struct cpuinfo_x86 *c) +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +89,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c) } -void __devinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +130,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +151,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -210,7 +210,7 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __devinit have_cpuid_p(void) +static int __cpuinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -254,7 +254,7 @@ static void __init early_cpu_detect(void) } } -void __devinit generic_identify(struct cpuinfo_x86 * c) +void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int junk; @@ -307,7 +307,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) #endif } -static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -335,7 +335,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __devinit identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -453,7 +453,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_HT -void __devinit detect_ht(struct cpuinfo_x86 *c) +void __cpuinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -500,7 +500,7 @@ void __devinit detect_ht(struct cpuinfo_x86 *c) } #endif -void __devinit print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -523,7 +523,7 @@ void __devinit print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -570,7 +570,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __devinit cpu_init(void) +void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); @@ -670,7 +670,7 @@ void __devinit cpu_init(void) } #ifdef CONFIG_HOTPLUG_CPU -void __devinit cpu_uninit(void) +void __cpuinit cpu_uninit(void) { int cpu = raw_smp_processor_id(); cpu_clear(cpu, cpu_initialized); diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 8c01201..5386b29 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -29,7 +29,7 @@ extern int trap_init_f00f_bug(void); struct movsl_mask movsl_mask __read_mostly; #endif -void __devinit early_intel_workaround(struct cpuinfo_x86 *c) +void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -44,7 +44,7 @@ void __devinit early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __devinit ppro_with_ram_bug(void) +int __cpuinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -62,7 +62,7 @@ int __devinit ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -81,7 +81,7 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -96,7 +96,7 @@ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __devinit init_intel(struct cpuinfo_x86 *c) +static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -205,7 +205,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __devinitdata = { +static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ffe58ce..36c9b37 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -330,7 +330,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) } } } -static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf, *sibling_leaf; int sibling; -- cgit v1.1 From 4f88651125e2ca8b106b6f65b65ea45776517bf3 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:34 -0800 Subject: [PATCH] i386: allow disabling X86_FEATURE_SEP at boot Allow the x86 "sep" feature to be disabled at bootup. This forces use of the int80 vsyscall. Mainly for testing or benchmarking the int80 vsyscall code. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index f63dcfb..bbfc278 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -26,8 +26,9 @@ DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); static int cachesize_override __cpuinitdata = -1; -static int disable_x86_fxsr __cpuinitdata = 0; +static int disable_x86_fxsr __cpuinitdata; static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -187,6 +188,14 @@ static int __init x86_fxsr_setup(char * s) __setup("nofxsr", x86_fxsr_setup); +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + /* Standard macro to see if a specific flag is changeable */ static inline int flag_is_changeable_p(u32 flag) { @@ -405,6 +414,10 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) clear_bit(X86_FEATURE_XMM, c->x86_capability); } + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); -- cgit v1.1 From 54a20f8c5d778ed3603130de4b92f64405228611 Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:36 -0800 Subject: [PATCH] i386: fall back to sensible CPU model name When vendor-specific i386 initialization code is unavailable the kernel falls back to a default CPU model name. Make that model name reflect the CPU family instead of an internal vendor index. Tested on Pentium II (family 6 model 5). /proc/cpuinfo before: model name : ff/05 after: model name : 06/05 Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Acked-by: "Seth, Rohit" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index bbfc278..7e3d6b6 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -430,7 +430,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) else /* Last resort... */ sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); + c->x86, c->x86_model); } /* Now the feature flags better reflect actual CPU features! */ -- cgit v1.1 From e5428ede94179ddccaa56308e0f194fa299edbb4 Mon Sep 17 00:00:00 2001 From: "Natalie.Protasevich@unisys.com" Date: Thu, 23 Mar 2006 02:59:36 -0800 Subject: [PATCH] Compilation fix for ES7000 when no ACPI is specified in config (i386) ES7000 platform code clean up for compilation errors and a warning. Ifdef'd the ACPI related parts in the ES7000 platform code. They were causing compile errors in certain configuration (without ACPI defined). I think this approach would be best (as opposed to Kconfig changes) since it only touches the subarch... Signed-off-by: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/mpparse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index e6e2f43..e85e463 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -828,6 +828,8 @@ void __init find_smp_config (void) smp_scan_config(address, 0x400); } +int es7000_plat; + /* -------------------------------------------------------------------------- ACPI-based MP Configuration -------------------------------------------------------------------------- */ @@ -1011,8 +1013,6 @@ void __init mp_override_legacy_irq ( return; } -int es7000_plat; - void __init mp_config_acpi_legacy_irqs (void) { struct mpc_config_intsrc intsrc; -- cgit v1.1 From cc04ee9cc527e314e6906849ee931c33764f861e Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Thu, 23 Mar 2006 02:59:38 -0800 Subject: [PATCH] i386 traps: merge printk calls Merge a few printk calls in i386 traps. Signed-off-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index ee61988..f20797b 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -188,8 +188,7 @@ static void show_trace_log_lvl(struct task_struct *task, stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk(log_lvl); - printk(" =======================\n"); + printk("%s =======================\n", log_lvl); } } @@ -218,14 +217,12 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, break; if (i && ((i % 8) == 0)) { printk("\n"); - printk(log_lvl); - printk(" "); + printk("%s ", log_lvl); } printk("%08lx ", *stack++); } printk("\n"); - printk(log_lvl); - printk("Call Trace:\n"); + printk("%sCall Trace:\n", log_lvl); show_trace_log_lvl(task, esp, log_lvl); } -- cgit v1.1 From 3c36c6aa4aa1b3483c6a0370028608367426f58e Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:39 -0800 Subject: [PATCH] i386: Don't let ptrace set the nested task bit There's no good reason for allowing ptrace to set the NT bit in EFLAGS, so mask it off. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/ptrace.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5c1fb6a..506462e 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -34,10 +34,10 @@ /* * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). * Also masks reserved bits (31-22, 15, 5, 3, 1). */ -#define FLAG_MASK 0x00054dd5 +#define FLAG_MASK 0x00050dd5 /* set's the trap flag. */ #define TRAP_FLAG 0x100 -- cgit v1.1 From 8bed51cd17464433a0c77afc8a5150e51d3da37d Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:40 -0800 Subject: [PATCH] i386: let signal handlers set the resume flag Allow signal handlers to set the RF bit in EFLAGS. This lets a simple debugger using SIGTRAP skip one instruction after returning from a signal. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/signal.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 963616d..608eac4 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -123,7 +123,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax err |= __get_user(tmp, &sc->seg); \ loadsegment(seg,tmp); } -#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \ +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_RF | \ + X86_EFLAGS_OF | X86_EFLAGS_DF | \ X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) -- cgit v1.1 From 99b7de33477882b86d54ce8ecbf90147f9d106d7 Mon Sep 17 00:00:00 2001 From: Stas Sergeev Date: Thu, 23 Mar 2006 02:59:41 -0800 Subject: [PATCH] x86: early printk handling fixes The history is that -mm kernels do not work for me for a few months already. The things started from crashing somewhere after starting init, and for the last month - no boot at all, just "Uncompressing... OK, booting kernel", and silence. Early console didn't work too. With the latest releases this degraded into an infinite stream of the "Unknown interrupt or fault" messages. So today my patience ran out and I started to think how can I collect at least some info for the bug-report. Attached is the patch that allows to gather some valueable debug info on the problem by making an early console more useable. I can't properly test the patch, as the kernel still doesn't boot, so I'll explain it in details in a hope someone else can justify the intrusive changes. arch_hooks.h: added prototypes for setup_early_printk() and early_printk(). setup.c: killed wrong setup_early_printk() prototype. Moved setup_early_printk() a bit earlier, as it was not "early enough" to cover the bug I was fighting with. early_printk.c: made it to start printing from the bottom of the screen, otherwise the messages interfere with the ones of the boot-loader, so you can't read them. Signed-off-by: Stas Sergeev Cc: Andi Kleen Cc: Zwane Mwaikambo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 5f58f8c..2d87829 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1459,6 +1459,16 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(cmdline_p); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + setup_early_printk(strchr(s, '=') + 1); + printk("early console enabled\n"); + } + } +#endif + max_low_pfn = setup_memory(); /* @@ -1483,19 +1493,6 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - extern void setup_early_printk(char *); - - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } - } -#endif - - dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH -- cgit v1.1 From 101f12af16fb12f8da8100899a13ee1b1b576a0a Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 23 Mar 2006 02:59:45 -0800 Subject: [PATCH] i386: actively synchronize vmalloc area when registering certain callbacks Registering a callback handler through register_die_notifier() is obviously primarily intended for use by modules. However, the way these currently get called it is basically impossible for them to actually be used by modules, as there is, on non-PAE configurationes, a good chance (the larger the module, the better) for the system to crash as a result. This is because the callback gets invoked (a) in the page fault path before the top level page table propagation gets carried out (hence a fault to propagate the top level page table entry/entries mapping to module's code/data would nest infinitly) and (b) in the NMI path, where nested faults must absolutely not happen, since otherwise the IRET from the nested fault re-enables NMIs, potentially resulting in nested NMI occurences. Besides the modular aspect, similar problems would even arise for in- kernel consumers of the API if they touched ioremap()ed or vmalloc()ed memory inside their handlers. Signed-off-by: Jan Beulich Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index f20797b..d510de7 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -99,6 +99,8 @@ int register_die_notifier(struct notifier_block *nb) { int err = 0; unsigned long flags; + + vmalloc_sync_all(); spin_lock_irqsave(&die_notifier_lock, flags); err = notifier_chain_register(&i386die_chain, nb); spin_unlock_irqrestore(&die_notifier_lock, flags); @@ -713,6 +715,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } EXPORT_SYMBOL_GPL(set_nmi_callback); -- cgit v1.1 From db753bdfc24c31228996799d508ce3bf7cbe3b99 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 23 Mar 2006 02:59:46 -0800 Subject: [PATCH] i386: fix uses of user_mode() vs. user_mode_vm() >commit 76381fee7e8feb4c22be636aa5d4765dbe4fbf9e >Author: Vincent Hanquez >Date: Thu Jun 23 00:08:46 2005 -0700 > > [PATCH] xen: x86_64: use more usermode macro > > Make use of the user_mode macro where it's possible. This is useful for Xen > because it will need only to redefine only the macro to a hypervisor call. I am of the opinion that the above changeset is incomplete, i.e. it missed converting some previous uses of user_mode to user_mode_vm. While most of them could be considered just cosmetical, at least the one in die_nmi doesn't appear to be. Signed-off-by: Jan Beulich Cc: Vincent Hanquez Cc: Zachary Amsden Cc: James Bottomley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/crash.c | 2 +- arch/i386/kernel/process.c | 2 +- arch/i386/kernel/traps.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index d49dbe8..e3c5fca0 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -105,7 +105,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) return 1; local_irq_disable(); - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 0480454..299e616 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -295,7 +295,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode(regs)) + if (user_mode_vm(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), system_utsname.release, diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index d510de7..a807a2d 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -254,7 +254,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -644,7 +644,7 @@ void die_nmi (struct pt_regs *regs, const char *msg) /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } -- cgit v1.1 From 635cf99a80f4ebee59d70eb64bb85ce829e4591f Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:48 -0800 Subject: [PATCH] i386: fix singlestep through an int80 syscall Using PTRACE_SINGLESTEP on a child that does an int80 syscall misses the SIGTRAP that should be delivered upon syscall exit. Fix that by setting TIF_SINGLESTEP when entering the kernel via int80 with TF set. /* Test whether singlestep through an int80 syscall works. */ #define _GNU_SOURCE #include #include #include #include #include #include #include static int child, status; static struct user_regs_struct regs; static void do_child() { ptrace(PTRACE_TRACEME, 0, 0, 0); kill(getpid(), SIGUSR1); asm ("int $0x80" : : "a" (20)); /* getpid */ } static void do_parent() { unsigned long eip, expected = 0; again: waitpid(child, &status, 0); if (WIFEXITED(status) || WIFSIGNALED(status)) return; if (WIFSTOPPED(status)) { ptrace(PTRACE_GETREGS, child, 0, ®s); eip = regs.eip; if (expected) fprintf(stderr, "child stop @ %08x, expected %08x %s\n", eip, expected, eip == expected ? "" : " <== ERROR"); if (*(unsigned short *)eip == 0x80cd) { fprintf(stderr, "int 0x80 at %08x\n", (unsigned int)eip); expected = eip + 2; } else expected = 0; ptrace(PTRACE_SINGLESTEP, child, NULL, NULL); } goto again; } int main(int argc, char * const argv[]) { child = fork(); if (child) do_parent(); else do_child(); return 0; } Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/entry.S | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 4d70472..cfc683f 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -226,6 +226,10 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) + testl $TF_MASK,EFLAGS(%esp) + jz no_singlestep + orl $_TIF_SINGLESTEP,TI_flags(%ebp) +no_singlestep: # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) -- cgit v1.1 From be0a39120cb6b88bd085a3a280f5c769a3ed85ad Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:48 -0800 Subject: [PATCH] i386: more vsyscall documentation Document a limitation of vsyscall-sysenter, since patches to fix it have been rejected. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/vsyscall-sysenter.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 76b7281..3b62baa 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S @@ -21,6 +21,9 @@ * instruction clobbers %esp, the user's %esp won't even survive entry * into the kernel. We store %esp in %ebp. Code in entry.S must fetch * arg6 from the stack. + * + * You can not use this vsyscall for the clone() syscall because the + * three dwords on the parent stack do not get copied to the child. */ .text .globl __kernel_vsyscall -- cgit v1.1 From 382dbd07c9cb4f255c28f680996ad819a8ce9bfe Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 23 Mar 2006 02:59:49 -0800 Subject: [PATCH] fix implicit declaration of GET_APIC_ID in arch/i386/kernel/apic.c arch/i386/kernel/apic.c:840: warning: implicit declaration of function `GET_APIC_ID' Signed-off-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apic.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 776c909..eb5279d 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -38,6 +38,7 @@ #include #include +#include #include #include "io_ports.h" -- cgit v1.1 From 52f4a91afd9316fb4f0f3a77c5ff56b9c98632ea Mon Sep 17 00:00:00 2001 From: Jesper Juhl Date: Thu, 23 Mar 2006 02:59:50 -0800 Subject: [PATCH] Fix the imlicit declaration of mtrr_centaur_report_mcr in arch/i386/kernel/cpu/centaur.c arch/i386/kernel/cpu/centaur.c: In function `centaur_mcr_insert': arch/i386/kernel/cpu/centaur.c:33: warning: implicit declaration of function `mtrr_centaur_report_mcr' Signed-off-by: Jesper Juhl Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/centaur.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index f52669e..bd75629 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -4,6 +4,7 @@ #include #include #include +#include #include "cpu.h" #ifdef CONFIG_X86_OOSTORE -- cgit v1.1 From 4ef0652a74d9c460299b00566d033bd45d60da98 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Thu, 23 Mar 2006 02:59:51 -0800 Subject: [PATCH] i386: cleanup after cpu_gdt_descr conversion to per-cpu data With cpu_gdt_descr having been converted to per-CPU data, the old object (in head.S) no longer needs to reserve space for each CPU's instance. With cpu_gdt_table not being used for CPU 0 anymore, it doesn't seem to need page alignment (or if in fact there is a need for it to retain that alignment, the whole object should go into .data.page_align). Signed-off-by: Jan Beulich Acked-by: Zachary Amsden Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/head.S | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e0b7c63..3debc2e 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -450,7 +450,6 @@ int_msg: .globl boot_gdt_descr .globl idt_descr -.globl cpu_gdt_descr ALIGN # early boot GDT descriptor (must use 1:1 address mapping) @@ -470,8 +469,6 @@ cpu_gdt_descr: .word GDT_ENTRIES*8-1 .long cpu_gdt_table - .fill NR_CPUS-1,8,0 # space for the other GDT descriptors - /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -485,7 +482,7 @@ ENTRY(boot_gdt_table) /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align PAGE_SIZE_asm + .align L1_CACHE_BYTES ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ -- cgit v1.1 From 75874d5cc8efef22457072e14103dc55f164e64e Mon Sep 17 00:00:00 2001 From: Chuck Ebbert <76306.1226@compuserve.com> Date: Thu, 23 Mar 2006 02:59:52 -0800 Subject: [PATCH] i386: fix dump_stack() i386 has a small bug in the stack dump code where it prints an extra log level code. Remove that and fix the alignment of normal stack dump printout. Also remove some unnecessary printk() calls. Signed-off-by: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index a807a2d..1b7ad41 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -217,19 +217,17 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, for(i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) { - printk("\n"); - printk("%s ", log_lvl); - } + if (i && ((i % 8) == 0)) + printk("\n%s ", log_lvl); printk("%08lx ", *stack++); } - printk("\n"); - printk("%sCall Trace:\n", log_lvl); + printk("\n%sCall Trace:\n", log_lvl); show_trace_log_lvl(task, esp, log_lvl); } void show_stack(struct task_struct *task, unsigned long *esp) { + printk(" "); show_stack_log_lvl(task, esp, ""); } -- cgit v1.1 From f2d0d263b51d13efe55f0aae9254b69ba2855f1c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 23 Mar 2006 02:59:52 -0800 Subject: [PATCH] x86: cpuid.4 doesn't need cpu level 5 Detecting cache line using cpuid.4, cpuid level 4 is enough. Signed-off-by: Shaohua Li Cc: Dave Jones Cc: "Seth, Rohit" Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 36c9b37..ce61921 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -174,7 +174,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - if (c->cpuid_level > 4) { + if (c->cpuid_level > 3) { static int is_initialized; if (is_initialized == 0) { -- cgit v1.1 From 7c5c1e427b5e83807fd05419d1cf6991b9d87247 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 23 Mar 2006 02:59:53 -0800 Subject: [PATCH] x86: deterine xapic using apic version Checking APIC version instead of CPU family to determine XAPIC. Family 6 CPU could have xapic as well. Signed-off-by: Shaohua Li Cc: Dave Jones Cc: "Seth, Rohit" Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/io_apic.c | 3 ++- arch/i386/kernel/mpparse.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 39d9a5f..fd1c60c 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -1761,7 +1761,8 @@ static void __init setup_ioapic_ids_from_mpc(void) * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) return; /* * This is broken; anything with a real cpu count has to diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index e85e463..8d8aa9d 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -937,7 +937,8 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) tmpid = io_apic_get_unique_id(idx, id); else tmpid = id; -- cgit v1.1 From bdaff4a331db46f3bd953f413316c4603c4004b4 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Thu, 23 Mar 2006 02:59:57 -0800 Subject: [PATCH] x86 topology: don;t create a control file for BSP that cannot be removed Don't create "online" control file for BSP (i386/x86_64) since its not removable. We originally added this to support ppc64 if the kernel has support but BIOS indicated no offline support, we just didnt create online files for them. We used the same method in ia64 as well, if we have a cpu taking platform interrupts but cannot be removed if those interrupts cannot be re-targeted to another cpu. Signed-off-by: Ashok Raj Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/topology.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 67a0e1b..2963552 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -41,6 +41,15 @@ int arch_register_cpu(int num){ parent = &node_devices[node].node; #endif /* CONFIG_NUMA */ + /* + * CPU0 cannot be offlined due to several + * restrictions and assumptions in kernel. This basically + * doesnt add a control file, one cannot attempt to offline + * BSP. + */ + if (!num) + cpu_devices[num].cpu.no_control = 1; + return register_cpu(&cpu_devices[num].cpu, num, parent); } -- cgit v1.1 From fc558a7496bfab3d29a68953b07a95883fdcfbb1 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 23 Mar 2006 03:00:05 -0800 Subject: [PATCH] swsusp: finally solve mysqld problem This patch from Pavel moves userland freeze signals handling into more logical place. It now hits even with mysqld running. Signed-off-by: Rafael J. Wysocki Signed-off-by: Pavel Machek Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/signal.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 608eac4..5c352c3 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -583,9 +583,6 @@ static void fastcall do_signal(struct pt_regs *regs) if (!user_mode(regs)) return; - if (try_to_freeze()) - goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else @@ -614,7 +611,6 @@ static void fastcall do_signal(struct pt_regs *regs) return; } -no_signal: /* Did we come from a system call? */ if (regs->orig_eax >= 0) { /* Restart the system call - no handlers present */ -- cgit v1.1 From 7a7d1cf95408863a657035701606b13644c9f55e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Mar 2006 03:00:35 -0800 Subject: [PATCH] sem2mutex: kprobes Semaphore to mutex conversion. The conversion was generated via scripts, and the result was validated automatically via a script as well. Signed-off-by: Ingo Molnar Acked-by: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 694a139..7a59050 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -84,9 +84,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); + mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); + mutex_unlock(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) -- cgit v1.1 From 91368d73e4b60d577ad171e5bd315b564265fcdb Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 23 Mar 2006 03:00:54 -0800 Subject: [PATCH] make bug messages more consistent Consolidate all kernel bug printouts to begin with the "BUG: " string. Makes it easier to find them in large bootup logs. Signed-off-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/nmi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index be87c5e..1db34ef 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -543,7 +543,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - die_nmi(regs, "NMI Watchdog detected LOCKUP"); + die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; -- cgit v1.1 From dd287796d608fcdc3fe5e8fdb5bf762a8f1bc32a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Mar 2006 03:00:57 -0800 Subject: [PATCH] pause_on_oops command line option Attempt to fix the problem wherein people's oops reports scroll off the screen due to repeated oopsing or to oopses on other CPUs. If this happens the user can reboot with the `pause_on_oops=' option. It will allow the first oopsing CPU to print an oops record just a single time. Second oopsing attempts, or oopses on other CPUs will cause those CPUs to enter a tight loop until the specified number of seconds have elapsed. The patch implements the infrastructure generically in the expectation that architectures other than x86 will find it useful. Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 1b7ad41..de5386b 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -352,6 +352,8 @@ void die(const char * str, struct pt_regs * regs, long err) static int die_counter; unsigned long flags; + oops_enter(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irqsave(&die.lock, flags); @@ -404,6 +406,7 @@ void die(const char * str, struct pt_regs * regs, long err) ssleep(5); panic("Fatal exception"); } + oops_exit(); do_exit(SIGSEGV); } -- cgit v1.1 From 394e3902c55e667945f6f1c2bdbc59842cce70f7 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 23 Mar 2006 03:01:05 -0800 Subject: [PATCH] more for_each_cpu() conversions When we stop allocating percpu memory for not-possible CPUs we must not touch the percpu data for not-possible CPUs at all. The correct way of doing this is to test cpu_possible() or to use for_each_cpu(). This patch is a kernel-wide sweep of all instances of NR_CPUS. I found very few instances of this bug, if any. But the patch converts lots of open-coded test to use the preferred helper macros. Cc: Mikael Starvik Cc: David Howells Acked-by: Kyle McMartin Cc: Anton Blanchard Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Paul Mundt Cc: "David S. Miller" Cc: William Lee Irwin III Cc: Andi Kleen Cc: Christian Zankel Cc: Philippe Elie Cc: Nathan Scott Cc: Jens Axboe Cc: Eric Dumazet Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 4 +--- arch/i386/kernel/io_apic.c | 22 +++++++++------------- arch/i386/kernel/nmi.c | 4 ++-- 3 files changed, 12 insertions(+), 18 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e11a092..3d5110b 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -1145,9 +1145,7 @@ static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - for (i=0; i CPU_IRQ(i)) { @@ -441,9 +439,7 @@ tryanothercpu: */ tmp_cpu_irq = 0; tmp_loaded = -1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) @@ -619,9 +615,7 @@ static int __init balanced_irq_init(void) if (smp_num_siblings > 1 && !cpus_empty(tmp)) physical_balance = 1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { @@ -638,9 +632,11 @@ static int __init balanced_irq_init(void) else printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); failed: - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { kfree(irq_cpu_data[i].irq_delta); + irq_cpu_data[i].irq_delta = NULL; kfree(irq_cpu_data[i].last_irq); + irq_cpu_data[i].last_irq = NULL; } return 0; } diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 1db34ef..9074818 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -143,7 +143,7 @@ static int __init check_nmi_watchdog(void) local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_cpu(cpu) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ @@ -510,7 +510,7 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for (i = 0; i < NR_CPUS; i++) + for_each_cpu(i) alert_counter[i] = 0; /* -- cgit v1.1 From b408cbc704352eccee301e1103b23203ba1c3a0e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 22 Feb 2006 15:50:30 -0800 Subject: [PATCH] PCI: resource address mismatch On Tue, 21 Feb 2006, Ivan Kokshaysky wrote: > There are two bogus entries in the BIOS memory map table which are > conflicting with a prefetchable memory range of the AGP bridge: > > BIOS-e820: 00000000fec00000 - 00000000fec01000 (reserved) > BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) > > 0000:00:02.0 PCI bridge: Silicon Integrated Systems [SiS] Virtual PCI-to-PCI bridge (AGP) (prog-if 00 [Normal decode]) > Flags: bus master, fast devsel, latency 0 > Bus: primary=00, secondary=01, subordinate=01, sec-latency=0 > I/O behind bridge: 0000c000-0000cfff > Memory behind bridge: e7e00000-e7efffff > Prefetchable memory behind bridge: fec00000-ffcfffff > ^^^^^^^^^^^^^^^^^ Yes. However, it's pretty clear that the e820 entries are there for a reason. Probably they are a hack by the BIOS maintainers to keep Windows from stomping/moving that region, exactly because they want to keep the bridge where it is (or, it's actually for the BIOS itself - the BIOS tables are a horrid mess, and BIOS engineers are pretty hacky people: they'll add random entries to make their own broken algorithms do the "right thing"). > Starting from 2.6.13, kernel tries to resolve that sort of conflicts, > so that prefetch window of the bridge and the framebuffer memory behind > it get moved to 0x10000000. I think we could (and probably should) solve this another way: consider the ACPI "reserved regions" from the e820 map exactly the same way that we do other ACPI hints - they should restrict _new_ allocations, but not impact stuff we figure out on our own. Basically, right now we assign _unassigned_ resources at "fs_initcall" time. If we were to add in the e820 "reserved region" stuff before that (but after we've done PCI discovery), we'd probably do the right thing. Right now we do the e820 reserved regions very early indeed: we call "register_memory()" from setup_arch(). We could move at least part of it (the part that registers the resources) down a bit. Here's a test-patch. I'm not saying we should absolutely do this, but it might be interesting to try... Cc: "Antonino A. Daplas" Cc: Ivan Kokshaysky Cc: Signed-off-by: Andrew Morton Signed-off-by: Greg Kroah-Hartman --- arch/i386/kernel/efi.c | 2 +- arch/i386/kernel/setup.c | 22 +++++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index aeabb41..7ec6cfa 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -543,7 +543,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (md->type) { case EFI_RESERVED_TYPE: res->name = "Reserved Memory"; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 2d87829..d313a11 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1288,7 +1288,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat struct resource *res; if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; @@ -1316,13 +1316,15 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources + * + * This is called just before pcibios_assign_resources(), which is also + * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c). */ -static void __init register_memory(void) +static int __init request_standard_resources(void) { - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; + int i; + printk("Setting up standard PCI resources\n"); if (efi_enabled) efi_initialize_iomem_resources(&code_resource, &data_resource); else @@ -1334,6 +1336,16 @@ static void __init register_memory(void) /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +fs_initcall(request_standard_resources); + +static void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; /* * Search for the bigest gap in the low 32 bits of the e820 -- cgit v1.1 From a72011567812cbd93788cc5facda160a3cba5905 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 24 Mar 2006 03:15:07 -0800 Subject: [PATCH] more-for_each_cpu-conversions fix I screwed up this conversion - we should be iterating across online CPUs, not possible ones. Spotted by Joe Perches Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index 3d5110b..798da7c 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -1145,14 +1145,14 @@ static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - for_each_cpu(i) { + for_each_online_cpu(i) { if (check_supported_cpu(i)) supported_cpus++; } if (supported_cpus == num_online_cpus()) { - printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron processors (" VERSION ")\n", - supported_cpus); + printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron " + "processors (" VERSION ")\n", supported_cpus); return cpufreq_register_driver(&cpufreq_amd64_driver); } -- cgit v1.1 From cdb0452789d365695b5b173542af9c7e3d24f185 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 24 Mar 2006 03:15:57 -0800 Subject: [PATCH] kill include/linux/platform.h, default_idle() cleanup include/linux/platform.h contained nothing that was actually used except the default_idle() prototype, and is therefore removed by this patch. This patch does the following with the platform specific default_idle() functions on different architectures: - remove the unused function: - parisc - sparc64 - make the needlessly global function static: - arm - h8300 - m68k - m68knommu - s390 - v850 - x86_64 - add a prototype in asm/system.h: - cris - i386 - ia64 Signed-off-by: Adrian Bunk Acked-by: Patrick Mochel Acked-by: Kyle McMartin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/apm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 05312a8..da30a37 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -824,8 +824,6 @@ static void apm_do_busy(void) static void (*original_pm_idle)(void); -extern void default_idle(void); - /** * apm_cpu_idle - cpu idling for APM capable Linux * -- cgit v1.1 From bc83455bc8336f57cd74d6e86b0f8fcad187d179 Mon Sep 17 00:00:00 2001 From: Andrey Panin Date: Sat, 25 Mar 2006 03:06:31 -0800 Subject: [PATCH] fix DMI onboard device discovery Attached patch fixes invalid pointer arithmetic in DMI code to make onboard device discovery working again. akpm: bug has been present since dmi_find_device() was added in 2.6.14. Affects ipmi only (I think) - the symptoms weren't described. akpm: changed to use pointer arithmetic rather than open-coded sizeof. Signed-off-by: Andrey Panin Cc: Corey Minyard Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 6a93d75..ca2a0cb 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -106,7 +106,7 @@ static void __init dmi_save_devices(struct dmi_header *dm) struct dmi_device *dev; for (i = 0; i < count; i++) { - char *d = ((char *) dm) + (i * 2); + char *d = (char *)(dm + 1) + (i * 2); /* Skip disabled device */ if ((*d & 0x80) == 0) -- cgit v1.1 From 34f361ade2fb4a869f6a7714d01c04ce4cfa75d9 Mon Sep 17 00:00:00 2001 From: Ashok Raj Date: Sat, 25 Mar 2006 03:08:18 -0800 Subject: [PATCH] Check if cpu can be onlined before calling smp_prepare_cpu() - Moved check for online cpu out of smp_prepare_cpu() - Moved default declaration of smp_prepare_cpu() to kernel/cpu.c - Removed lock_cpu_hotplug() from smp_prepare_cpu() to around it, since its called from cpu_up() as well now. - Removed clearing from cpu_present_map during cpu_offline as it breaks using cpu_up() directly during a subsequent online operation. Signed-off-by: Ashok Raj Cc: Srivatsa Vaddagiri Cc: "Li, Shaohua" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/smpboot.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 4c470e9..82371d8 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -1003,7 +1003,6 @@ void cpu_exit_clear(void) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); - cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, smp_commenced_mask); unmap_cpu_to_logical_apicid(cpu); @@ -1015,31 +1014,20 @@ struct warm_boot_cpu_info { int cpu; }; -static void __devinit do_warm_boot_cpu(void *p) +static void __cpuinit do_warm_boot_cpu(void *p) { struct warm_boot_cpu_info *info = p; do_boot_cpu(info->apicid, info->cpu); complete(info->complete); } -int __devinit smp_prepare_cpu(int cpu) +static int __cpuinit __smp_prepare_cpu(int cpu) { DECLARE_COMPLETION(done); struct warm_boot_cpu_info info; struct work_struct task; int apicid, ret; - lock_cpu_hotplug(); - - /* - * On x86, CPU0 is never offlined. Trying to bring up an - * already-booted CPU will hang. So check for that case. - */ - if (cpu_online(cpu)) { - ret = -EINVAL; - goto exit; - } - apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { ret = -ENODEV; @@ -1064,7 +1052,6 @@ int __devinit smp_prepare_cpu(int cpu) zap_low_mappings(); ret = 0; exit: - unlock_cpu_hotplug(); return ret; } #endif @@ -1392,6 +1379,22 @@ void __cpu_die(unsigned int cpu) int __devinit __cpu_up(unsigned int cpu) { +#ifdef CONFIG_HOTPLUG_CPU + int ret=0; + + /* + * We do warm boot only on cpus that had booted earlier + * Otherwise cold boot is all handled from smp_boot_cpus(). + * cpu_callin_map is set during AP kickstart process. Its reset + * when a cpu is taken offline from cpu_exit_clear(). + */ + if (!cpu_isset(cpu, cpu_callin_map)) + ret = __smp_prepare_cpu(cpu); + + if (ret) + return -EIO; +#endif + /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); -- cgit v1.1 From f081a529f808ed450c22553de7b3275e0ffde9a0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Sat, 25 Mar 2006 01:51:51 -0800 Subject: [PATCH] cpufreq: speedstep-smi asm fix Fix bug identified by Linus Torvalds : the `out' instruction depends upon the state of memory_data[], so we need to tell gcc that before executing it. (The opcode, not gcc). Fixes http://bugzilla.kernel.org/show_bug.cgi?id=5553 Thanks to Antonio Ospite for testing. Cc: Dave Jones Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/speedstep-smi.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 28cc5d5..cfc4276e 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -75,7 +75,9 @@ static int speedstep_smi_ownership (void) __asm__ __volatile__( "out %%al, (%%dx)\n" : "=D" (result) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), + "D" (0), "S" (magic) + : "memory" ); dprintk("result is %x\n", result); -- cgit v1.1 From f083a329e63d471a5e9238e837772b1b76c218db Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:19 +0100 Subject: [PATCH] x86_64: Clean up and tweak ACPI blacklist year code - Move the core parser into dmi_scan.c. It can be useful for other subsystems too. - Differentiate between field doesn't exist and field is 0 or unparseable. The first case is likely an old BIOS with broken ACPI, the later is likely a slightly buggy BIOS where someone forget to edit the date. Don't blacklist in the later case. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index ca2a0cb..d2dfd9c 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -299,3 +299,33 @@ struct dmi_device * dmi_find_device(int type, const char *name, return NULL; } EXPORT_SYMBOL(dmi_find_device); + +/** + * dmi_get_year - Return year of a DMI date + * @field: data index (like dmi_get_system_info) + * + * Returns -1 when the field doesn't exist. 0 when it is broken. + */ +int dmi_get_year(int field) +{ + int year; + char *s = dmi_get_system_info(field); + + if (!s) + return -1; + if (*s == '\0') + return 0; + s = strrchr(s, '/'); + if (!s) + return 0; + + s += 1; + year = simple_strtoul(s, NULL, 0); + if (year && year < 100) { /* 2-digit year */ + year += 1900; + if (year < 1996) /* no dates < spec 1.0 */ + year += 100; + } + + return year; +} -- cgit v1.1 From f2d3efedbecc04dc348d723e4c90b46731b3bb48 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:22 +0100 Subject: [PATCH] x86_64: Implement early DMI scanning There are more and more cases where we need to know DMI information early to work around bugs. i386 already had early DMI scanning, but x86-64 didn't. Implement this now. This required some cleanup in the i386 code. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index d2dfd9c..ebc8dc116 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -5,6 +5,7 @@ #include #include #include +#include static char * __init dmi_string(struct dmi_header *dm, u8 s) { -- cgit v1.1 From 2ab7f1833baf0f0a0ca9868ee21f8273e2858132 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:30:52 +0100 Subject: [PATCH] x86_64: Quieten down microcode update driver Only log data in microcode driver when something is changed Otherwise it was far too noisy on large systems. Also remove the printk when it is unloaded. Cc: tigran@veritas.com Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/microcode.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 5390b52..55bc365 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -202,8 +202,6 @@ static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_he } else if (mc_header->rev == uci->rev) { /* notify the caller of success on this cpu */ uci->err = MC_SUCCESS; - printk(KERN_ERR "microcode: CPU%d already at revision" - " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); goto out; } @@ -369,7 +367,6 @@ static void do_update_one (void * unused) struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; if (uci->mc == NULL) { - printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num); return; } @@ -511,7 +508,6 @@ static int __init microcode_init (void) static void __exit microcode_exit (void) { misc_deregister(µcode_dev); - printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n"); } module_init(microcode_init) -- cgit v1.1 From 9d95dd849ccc43c4b21504e1829b5bed68cdb1bc Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:22 +0100 Subject: [PATCH] i386/x86-64: List Intel LaGrange AKA SMX in /proc/cpuinfo Spec just got published so we know the CPUID bit. Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/proc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 5cfbd80..f94cdb7 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -45,7 +45,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, -- cgit v1.1 From ad90573f93533ddf1035b0468ed27b4453e50c46 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sat, 25 Mar 2006 16:31:52 +0100 Subject: [PATCH] x86_64: Initialize powernow_data[] for all siblings I got an oops on a dual core system because the lost tick handler called cpufreq_get() on core 1 and powernow tried to follow a NULL powernow_data[] pointer there. Initialize powernow_data for all cores of a CPU. Cc: Jacob Shin Cc: Dave Jones Signed-off-by: Andi Kleen Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index d09bec7..e5bc064 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -54,7 +54,7 @@ static DECLARE_MUTEX(fidvid_sem); static struct powernow_k8_data *powernow_data[NR_CPUS]; #ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; +static cpumask_t cpu_core_map[1] = { CPU_MASK_ALL }; #endif /* Return a frequency in MHz, given an input fid */ @@ -976,7 +976,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) { struct powernow_k8_data *data; cpumask_t oldmask = CPU_MASK_ALL; - int rc; + int rc, i; if (!cpu_online(pol->cpu)) return -ENODEV; @@ -1062,7 +1062,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) printk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - powernow_data[pol->cpu] = data; + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) + powernow_data[i] = data; return 0; -- cgit v1.1 From 20c0d2d4402973050e7aad8a481ec6f847ee40d8 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Sun, 26 Mar 2006 01:37:01 -0800 Subject: [PATCH] i386: pass proper trap numbers to die chain handlers Pass the trap number causing the call to notify_die() to the die notification handler chain in a number of instances. Also, honor the return value from the handler chain invocation in die() as, through a debugger, the fault may have been fixed. Signed-off-by: Jan Beulich Acked-By: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/traps.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index de5386b..4624f8c 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -386,8 +386,12 @@ void die(const char * str, struct pt_regs * regs, long err) #endif if (nl) printk("\n"); - notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); - show_registers(regs); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) != + NOTIFY_STOP) + show_registers(regs); + else + regs = NULL; } else printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); @@ -395,6 +399,9 @@ void die(const char * str, struct pt_regs * regs, long err) die.lock_owner = -1; spin_unlock_irqrestore(&die.lock, flags); + if (!regs) + return; + if (kexec_should_crash(current)) crash_kexec(regs); @@ -623,7 +630,7 @@ static DEFINE_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -662,7 +669,7 @@ static void default_do_nmi(struct pt_regs * regs) reason = get_nmi_reason(); if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; #ifdef CONFIG_X86_LOCAL_APIC @@ -678,7 +685,7 @@ static void default_do_nmi(struct pt_regs * regs) unknown_nmi_error(reason, regs); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; if (reason & 0x80) mem_parity_error(reason, regs); -- cgit v1.1 From 10dbe196a8da6b3196881269c6639c0ec11c36cb Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Sun, 26 Mar 2006 01:37:02 -0800 Subject: [PATCH] i386: export: memory more than 4G through /proc/iomem Currently /proc/iomem exports physical memory also apart from io device memory. But on i386, it truncates any memory more than 4GB. This leads to problems for kexec/kdump. Kexec reads /proc/iomem to determine the system memory layout and prepares a memory map based on that and passes it to the kernel being kexeced. Given the fact that memory more than 4GB has been truncated, new kernel never gets to see and use that memory. Kdump also reads /proc/iomem to determine the physical memory layout of the system and encodes this informaiton in ELF headers. After a crash new kernel parses these ELF headers being used by previous kernel and vmcore is prepared accordingly. As memory more than 4GB has been truncated, kdump never sees that memory and never prepares ELF headers for it. Hence vmcore is truncated and limited to 4GB even if there is more physical memory in the system. This patch exports memory more than 4GB through /proc/iomem on i386. Signed-off-by: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index d313a11..cec3c92 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1286,8 +1286,6 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat probe_roms(); for (i = 0; i < e820.nr_map; i++) { struct resource *res; - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; -- cgit v1.1 From 3ed3bce846abc7ef460104b461cac793e41afe5e Mon Sep 17 00:00:00 2001 From: Matt Domsch Date: Sun, 26 Mar 2006 01:37:03 -0800 Subject: [PATCH] ia64: use i386 dmi_scan.c Enable DMI table parsing on ia64. Andi Kleen has a patch in his x86_64 tree which enables the use of i386 dmi_scan.c on x86_64. dmi_scan.c functions are being used by the drivers/char/ipmi/ipmi_si_intf.c driver for autodetecting the ports or memory spaces where the IPMI controllers may be found. This patch adds equivalent changes for ia64 as to what is in the x86_64 tree. In addition, I reworked the DMI detection, such that on EFI-capable systems, it uses the efi.smbios pointer to find the table, rather than brute-force searching from 0xF0000. On non-EFI systems, it continues the brute-force search. My test system, an Intel S870BN4 'Tiger4', aka Dell PowerEdge 7250, with latest BIOS, does not list the IPMI controller in the ACPI namespace, nor does it have an ACPI SPMI table. Also note, currently shipping Dell x8xx EM64T servers don't have these either, so DMI is the only method for obtaining the address of the IPMI controller. Signed-off-by: Matt Domsch Acked-by: "Luck, Tony" Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 90 +++++++++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 32 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index ebc8dc116..bfecbd4 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -185,47 +186,72 @@ static void __init dmi_decode(struct dmi_header *dm) } } -void __init dmi_scan_machine(void) +static int __init dmi_present(char __iomem *p) { u8 buf[15]; - char __iomem *p, *q; + memcpy_fromio(buf, p, 15); + if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + u16 num = (buf[13] << 8) | buf[12]; + u16 len = (buf[7] << 8) | buf[6]; + u32 base = (buf[11] << 24) | (buf[10] << 16) | + (buf[9] << 8) | buf[8]; - /* - * no iounmap() for that ioremap(); it would be a no-op, but it's - * so early in setup that sucker gets confused into doing what - * it shouldn't if we actually call it. - */ - p = ioremap(0xF0000, 0x10000); - if (p == NULL) - goto out; - - for (q = p; q < p + 0x10000; q += 16) { - memcpy_fromio(buf, q, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { - u16 num = (buf[13] << 8) | buf[12]; - u16 len = (buf[7] << 8) | buf[6]; - u32 base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; - - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); + /* + * DMI version 0.0 means that the real version is taken from + * the SMBIOS version, which we don't know at this point. + */ + if (buf[14] != 0) + printk(KERN_INFO "DMI %d.%d present.\n", + buf[14] >> 4, buf[14] & 0xF); + else + printk(KERN_INFO "DMI present.\n"); + if (dmi_table(base,len, num, dmi_decode) == 0) + return 0; + } + return 1; +} - if (dmi_table(base,len, num, dmi_decode) == 0) +void __init dmi_scan_machine(void) +{ + char __iomem *p, *q; + int rc; + + if (efi_enabled) { + if (!efi.smbios) + goto out; + + /* This is called as a core_initcall() because it isn't + * needed during early boot. This also means we can + * iounmap the space when we're done with it. + */ + p = dmi_ioremap((unsigned long)efi.smbios, 0x10000); + if (p == NULL) + goto out; + + rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ + iounmap(p); + if (!rc) + return; + } + else { + /* + * no iounmap() for that ioremap(); it would be a no-op, but + * it's so early in setup that sucker gets confused into doing + * what it shouldn't if we actually call it. + */ + p = dmi_ioremap(0xF0000, 0x10000); + if (p == NULL) + goto out; + + for (q = p; q < p + 0x10000; q += 16) { + rc = dmi_present(q); + if (!rc) return; } } - -out: printk(KERN_INFO "DMI not present or invalid.\n"); + out: printk(KERN_INFO "DMI not present or invalid.\n"); } - /** * dmi_check_system - check system DMI data * @list: array of dmi_system_id structures to match against -- cgit v1.1 From 27d8e3d15bcf9d7cd99bf6ca910ea9e34328c7fb Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sun, 26 Mar 2006 01:37:07 -0800 Subject: [PATCH] DMI: only ioremap stuff we actually need dmi_scan_machine() tries to ioremap 0x10000 (64K) bytes, even though it only looks at the first 32 bytes or so. If the SMBIOS table is near the end of a memory region, the ioremap() may fail when it shouldn't. This is in the efi_enabled path, so it really only affects ia64 at the moment. Signed-off-by: Bjorn Helgaas Cc: Matt Domsch Cc: "Tolentino, Matthew E" Cc: "Brown, Len" Cc: Andi Kleen Acked-by: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index bfecbd4..c032f9e 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -224,7 +224,7 @@ void __init dmi_scan_machine(void) * needed during early boot. This also means we can * iounmap the space when we're done with it. */ - p = dmi_ioremap((unsigned long)efi.smbios, 0x10000); + p = dmi_ioremap((unsigned long)efi.smbios, 32); if (p == NULL) goto out; -- cgit v1.1 From b2c99e3c70d77fb194df5aa1642030080d28ea48 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Sun, 26 Mar 2006 01:37:08 -0800 Subject: [PATCH] EFI: keep physical table addresses in efi structure Almost all users of the table addresses from the EFI system table want physical addresses. So rather than doing the pa->va->pa conversion, just keep physical addresses in struct efi. This fixes a DMI bug: the efi structure contained the physical SMBIOS address on x86 but the virtual address on ia64, so dmi_scan_machine() used ioremap() on a virtual address on ia64. This is essentially the same as an earlier patch by Matt Tolentino: http://marc.theaimsgroup.com/?l=linux-kernel&m=112130292316281&w=2 except that this changes all table addresses, not just ACPI addresses. Matt's original patch was backed out because it caused MCAs on HP sx1000 systems. That problem is resolved by the ioremap() attribute checking added for ia64. Signed-off-by: Bjorn Helgaas Cc: Matt Domsch Cc: "Tolentino, Matthew E" Cc: "Brown, Len" Cc: Andi Kleen Acked-by: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/acpi/boot.c | 8 ++++---- arch/i386/kernel/dmi_scan.c | 4 ++-- arch/i386/kernel/efi.c | 21 +++++++++++++++------ 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index f1a2194..0330661 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -668,10 +668,10 @@ unsigned long __init acpi_find_rsdp(void) unsigned long rsdp_phys = 0; if (efi_enabled) { - if (efi.acpi20) - return __pa(efi.acpi20); - else if (efi.acpi) - return __pa(efi.acpi); + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + return efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) + return efi.acpi; } /* * Scan memory looking for the RSDP signature. First search EBDA (low diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index c032f9e..170d4c9 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -217,14 +217,14 @@ void __init dmi_scan_machine(void) int rc; if (efi_enabled) { - if (!efi.smbios) + if (efi.smbios == EFI_INVALID_TABLE_ADDR) goto out; /* This is called as a core_initcall() because it isn't * needed during early boot. This also means we can * iounmap the space when we're done with it. */ - p = dmi_ioremap((unsigned long)efi.smbios, 32); + p = dmi_ioremap(efi.smbios, 32); if (p == NULL) goto out; diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 7ec6cfa..c224c2a 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -381,29 +381,38 @@ void __init efi_init(void) if (config_tables == NULL) printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); + efi.mps = EFI_INVALID_TABLE_ADDR; + efi.acpi = EFI_INVALID_TABLE_ADDR; + efi.acpi20 = EFI_INVALID_TABLE_ADDR; + efi.smbios = EFI_INVALID_TABLE_ADDR; + efi.sal_systab = EFI_INVALID_TABLE_ADDR; + efi.boot_info = EFI_INVALID_TABLE_ADDR; + efi.hcdp = EFI_INVALID_TABLE_ADDR; + efi.uga = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < num_config_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = (void *)config_tables[i].table; + efi.mps = config_tables[i].table; printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = __va(config_tables[i].table); + efi.acpi20 = config_tables[i].table; printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = __va(config_tables[i].table); + efi.acpi = config_tables[i].table; printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = (void *) config_tables[i].table; + efi.smbios = config_tables[i].table; printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = (void *)config_tables[i].table; + efi.hcdp = config_tables[i].table; printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { - efi.uga = (void *)config_tables[i].table; + efi.uga = config_tables[i].table; printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); } } -- cgit v1.1 From 23dd842c0033dbb05248c42929c3c526c55386de Mon Sep 17 00:00:00 2001 From: "Tolentino, Matthew E" Date: Sun, 26 Mar 2006 01:37:09 -0800 Subject: [PATCH] EFI fixes Here's a patch that fixes EFI boot for x86 on 2.6.16-rc5-mm3. The off-by-one is admittedly my fault, but the other two fix up the rest. Cc: Bjorn Helgaas Cc: Matt Domsch Cc: "Tolentino, Matthew E" Cc: "Brown, Len" Cc: Andi Kleen Cc: "Luck, Tony" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/dmi_scan.c | 2 +- arch/i386/kernel/setup.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 170d4c9..5efceeb 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -229,7 +229,7 @@ void __init dmi_scan_machine(void) goto out; rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ - iounmap(p); + dmi_iounmap(p, 32); if (!rc) return; } diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index cec3c92..6917daa 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1058,10 +1058,10 @@ static int __init free_available_memory(unsigned long start, unsigned long end, void *arg) { /* check max_low_pfn */ - if (start >= ((max_low_pfn + 1) << PAGE_SHIFT)) + if (start >= (max_low_pfn << PAGE_SHIFT)) return 0; - if (end >= ((max_low_pfn + 1) << PAGE_SHIFT)) - end = (max_low_pfn + 1) << PAGE_SHIFT; + if (end >= (max_low_pfn << PAGE_SHIFT)) + end = max_low_pfn << PAGE_SHIFT; if (start < end) free_bootmem(start, end - start); -- cgit v1.1 From 14cc3e2b633bb64063698980974df4535368e98f Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 26 Mar 2006 01:37:14 -0800 Subject: [PATCH] sem2mutex: misc static one-file mutexes Semaphore to mutex conversion. The conversion was generated via scripts, and the result was validated automatically via a script as well. Signed-off-by: Ingo Molnar Cc: Dave Jones Cc: Paul Mackerras Cc: Ralf Baechle Cc: Jens Axboe Cc: Neil Brown Acked-by: Alasdair G Kergon Cc: Greg KH Cc: Dominik Brodowski Cc: Adam Belay Cc: Martin Schwidefsky Cc: "David S. Miller" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/cpufreq/powernow-k8.c | 9 +++++---- arch/i386/kernel/cpu/mtrr/main.c | 13 +++++++------ arch/i386/kernel/microcode.c | 7 ++++--- 3 files changed, 16 insertions(+), 13 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e5bc064..1e70823 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -40,6 +40,7 @@ #ifdef CONFIG_X86_POWERNOW_K8_ACPI #include +#include #include #endif @@ -49,7 +50,7 @@ #include "powernow-k8.h" /* serialize freq changes */ -static DECLARE_MUTEX(fidvid_sem); +static DEFINE_MUTEX(fidvid_mutex); static struct powernow_k8_data *powernow_data[NR_CPUS]; @@ -943,17 +944,17 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) goto err_out; - down(&fidvid_sem); + mutex_lock(&fidvid_mutex); powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; - up(&fidvid_sem); + mutex_unlock(&fidvid_mutex); goto err_out; } - up(&fidvid_sem); + mutex_unlock(&fidvid_mutex); pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 3b4618b..fff90bd 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -36,6 +36,7 @@ #include #include #include +#include #include @@ -47,7 +48,7 @@ u32 num_var_ranges = 0; unsigned int *usage_table; -static DECLARE_MUTEX(mtrr_sem); +static DEFINE_MUTEX(mtrr_mutex); u32 size_or_mask, size_and_mask; @@ -333,7 +334,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); /* Search for existing MTRR */ - down(&mtrr_sem); + mutex_lock(&mtrr_mutex); for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (base >= lbase + lsize) @@ -371,7 +372,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, printk(KERN_INFO "mtrr: no more MTRRs available\n"); error = i; out: - up(&mtrr_sem); + mutex_unlock(&mtrr_mutex); unlock_cpu_hotplug(); return error; } @@ -464,7 +465,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); - down(&mtrr_sem); + mutex_lock(&mtrr_mutex); if (reg < 0) { /* Search for existing MTRR */ for (i = 0; i < max; ++i) { @@ -503,7 +504,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) set_mtrr(reg, 0, 0, 0); error = reg; out: - up(&mtrr_sem); + mutex_unlock(&mtrr_mutex); unlock_cpu_hotplug(); return error; } @@ -685,7 +686,7 @@ void mtrr_ap_init(void) if (!mtrr_if || !use_intel()) return; /* - * Ideally we should hold mtrr_sem here to avoid mtrr entries changed, + * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, * but this routine will be called in cpu boot time, holding the lock * breaks it. This routine is called in two cases: 1.very earily time * of software resume, when there absolutely isn't mtrr entry changes; diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 55bc365..dd780a0 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -81,6 +81,7 @@ #include #include #include +#include #include #include @@ -114,7 +115,7 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DECLARE_MUTEX(microcode_sem); +static DEFINE_MUTEX(microcode_mutex); static void __user *user_buffer; /* user area microcode data buffer */ static unsigned int user_buffer_size; /* it's size */ @@ -444,7 +445,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ return -EINVAL; } - down(µcode_sem); + mutex_lock(µcode_mutex); user_buffer = (void __user *) buf; user_buffer_size = (int) len; @@ -453,7 +454,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ if (!ret) ret = (ssize_t)len; - up(µcode_sem); + mutex_unlock(µcode_mutex); return ret; } -- cgit v1.1 From d6d21dfdd305bf94300df13ff472141d3411ea17 Mon Sep 17 00:00:00 2001 From: Darren Jenkins Date: Sun, 26 Mar 2006 01:37:34 -0800 Subject: [PATCH] fix array overrun in efi.c Coverity found an over-run @ line 364 of efi.c This is due to the loop checking the size correctly, then adding a '\0' after possibly hitting the end of the array. Ensure the loop exits with one space left in the array. Signed-off-by: Darren Jenkins Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index c224c2a..9202b67 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -361,7 +361,7 @@ void __init efi_init(void) */ c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); if (c16) { - for (i = 0; i < sizeof(vendor) && *c16; ++i) + for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else -- cgit v1.1 From b50ea74c7bc3ebe3d88a357893f0b96ae9092f13 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 26 Mar 2006 01:38:13 -0800 Subject: [PATCH] kprobes: clean up resume_execute() Clean up kprobe's resume_execute() for i386 arch. Signed-off-by: Masami Hiramatsu Cc: Prasanna S Panchamukhi Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 7a59050..b40614f 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -362,10 +362,10 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = (unsigned long *)®s->esp; - unsigned long next_eip = 0; unsigned long copy_eip = (unsigned long)p->ainsn.insn; unsigned long orig_eip = (unsigned long)p->addr; + regs->eflags &= ~TF_MASK; switch (p->ainsn.insn[0]) { case 0x9c: /* pushfl */ *tos &= ~(TF_MASK | IF_MASK); @@ -375,9 +375,9 @@ static void __kprobes resume_execution(struct kprobe *p, case 0xcb: case 0xc2: case 0xca: - regs->eflags &= ~TF_MASK; - /* eip is already adjusted, no more changes required*/ - return; + case 0xea: /* jmp absolute -- eip is correct */ + /* eip is already adjusted, no more changes required */ + goto no_change; case 0xe8: /* call relative - Fix return addr */ *tos = orig_eip + (*tos - copy_eip); break; @@ -385,27 +385,21 @@ static void __kprobes resume_execution(struct kprobe *p, if ((p->ainsn.insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ /* Fix return addr; eip is correct. */ - next_eip = regs->eip; *tos = orig_eip + (*tos - copy_eip); + goto no_change; } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ /* eip is correct. */ - next_eip = regs->eip; + goto no_change; } - break; - case 0xea: /* jmp absolute -- eip is correct */ - next_eip = regs->eip; - break; default: break; } - regs->eflags &= ~TF_MASK; - if (next_eip) { - regs->eip = next_eip; - } else { - regs->eip = orig_eip + (regs->eip - copy_eip); - } + regs->eip = orig_eip + (regs->eip - copy_eip); + +no_change: + return; } /* -- cgit v1.1 From 311ac88fd2d4194a95e9e38d2fe08917be98723c Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 26 Mar 2006 01:38:17 -0800 Subject: [PATCH] x86: kprobes-booster Current kprobe copies the original instruction at the probe point and replaces it with a breakpoint instruction (int3). When the kernel hits the probe point, kprobe handler is invoked. And the copied instruction is single-step executed on the copied buffer (not on the original address) by kprobe. After that, the kprobe checks registers and modify it (if need) as if the instructions was executed on the original address. My proposal is based on the fact there are many instructions which do NOT require the register modification after the single-step execution. When the copied instruction is a kind of them, kprobe just jumps back to the next instruction after single-step execution. If so, why don't we execute those instructions directly? With kprobe-booster patch, kprobes will execute a copied instruction directly and (if need) jump back to original code. This direct execution is executed when the kprobe don't have both post_handler and break_handler, and the copied instruction can be executed directly. I sorted instructions which can be executed directly or not; - Call instructions are NG(can not be executed directly). We should correct the return address pushed into top of stack. - Indirect instructions except for absolute indirect-jumps are NG. Those instructions changes EIP randomly. We should check EIP and correct it. - Instructions that change EIP beyond the range of the instruction buffer are NG. - Instructions that change EIP to tail 5 bytes of the instruction buffer (it is the size of a jump instruction). We must write a jump instruction which backs to original kernel code in the instruction buffer. - Break point instruction is NG. We should not touch EIP and pass to other handlers. - Absolute direct/indirect jumps are OK.- Conditional Jumps are NG. - Halt and software-interruptions are NG. Because it will stay on the instruction buffer of kprobes. - Prefixes are NG. - Unknown/reserved opcode is NG. - Other 1 byte instructions are OK. But those instructions need a jump back code. - 2 bytes instructions are mapped sparsely. So, in this release, this patch don't boost those instructions. >From Intel's IA-32 opcode map described in IA-32 Intel Architecture Software Developer's Manual Vol.2 B, I determined that following opcodes are not boostable. - 0FH (2byte escape) - 70H - 7FH (Jump on condition) - 9AH (Call) and 9CH (Pushf) - C0H-C1H (Grp 2: includes reserved opcode) - C6H-C7H (Grp11: includes reserved opcode) - CCH-CEH (Software-interrupt) - D0H-D3H (Grp2: includes reserved opcode) - D6H (Reserved) - D8H-DFH (Coprocessor) - E0H-E3H (loop/conditional jump) - E8H (Call) - F0H-F3H (Prefixes and reserved) - F4H (Halt) - F6H-F7H (Grp3: includes reserved opcode) - FEH-FFH(Grp4,5: includes reserved opcode) Kprobe-booster checks whether target instruction can be boosted (can be executed directly) at arch_copy_kprobe() function. If the target instruction can be boosted, it clears "boostable" flag. If not, it sets "boostable" flag -1. This is disabled status. In resume_execution() function, If "boostable" flag is cleared, kprobe-booster measures the size of the target instruction and sets "boostable" flag 1. In kprobe_handler(), kprobe checks the "boostable" flag. If the flag is 1, it resets current kprobe and executes instruction buffer directly instead of single stepping. When unregistering a boosted kprobe, it calls synchronize_sched() after "int3" is removed. So we can ensure followings after the synchronize_sched() called. - interrupt handlers are finished on all CPUs. - instruction buffer is not executed on all CPUs. And we can release the boosted kprobe safely. And also, on preemptible kernel, the booster is not enabled where the kernel preemption is enabled. So, there are no preempted threads on the instruction buffer. The description of kretprobe-booster: ==================================== In the normal operation, kretprobe make a target function return to trampoline code. And a kprobe (called trampoline_probe) have been inserted at the trampoline code. When the kernel hits this kprobe, it calls kretprobe's handler and it returns to original return address. Kretprobe-booster patch removes the trampoline_probe. It allows the trampoline code to call kretprobe's handler directly instead of invoking kprobe. And tranpoline code returns to original return address. This new trampoline code stores and restores registers, so the kretprobe handler is still able to access those registers. Current kprobe has about 1.3 usec/probe(*) overhead, and kprobe-booster patch reduces it to 0.6 usec/probe(*). Also current kretprobe has about 2.0 usec/probe(*) overhead. Kprobe-booster patch reduces it to 1.3 usec/probe(*), and the combination of both kprobe-booster patch and kretprobe-booster patch reduces it to 0.9 usec/probe(*). I expect the combination of both patches can reduce half of a probing overhead. Performance numbers strongly depend on the processor model. Andrew Morton wrote: > These preempt tricks look rather nasty. Can you please describe what the > problem is, precisely? And how this code avoids it? Perhaps we can find > something cleaner. The problem is how to remove the copied instructions of the kprobe *safely* on the preemptable kernel (CONFIG_PREEMPT=y). Kprobes basically executes the following actions; (1)int3 (2)preempt_disable() (3)kprobe_prehandler() (4)copied instructioin(single step) (5)kprobe_posthandler() (6)preempt_enable() (7)return to the original code During the execution of copied instruction, preemption is disabled (from step (2) to (6)). When unregistering the probes, Kprobe waits for RCU quiescent state by using synchronize_sched() after removing int3 instruction. Thus we can ensure the copied instruction is not executed. On the other hand, kprobe-booster executes the following actions; (1)int3 (2)preempt_disable() (3)kprobe_prehandler() (4)preempt_enable() <-- this one is added by my patch (5)copied instruction(direct execution) (6)jmp back to the original code The problem is that we have no way to prevent preemption on step (5) or (6). We cannot call preempt_disable() after step (6), because there are no rooms to do that. Thus, some other processes may be preempted at step(5) or (6) on preemptable kernel. And I couldn't find the easy way to ensure that other processes' stack do *not* have the address of them. (I thought some way to do that, but those are very costly.) So currently, I simply boost the kprobe only when the probe point is already preemption disabled. > Also, the patch adds a preempt_enable() but I don't see a corresponding > preempt_disable(). Am I missing something? It is corresponding to the preempt_disable() in the top of kprobe_handler(). I copied the code of kprobe_handler() here: static int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; kprobe_opcode_t *addr = NULL; unsigned long *lp; struct kprobe_ctlblk *kcb; /* * We don't want to be preempted for the entire * duration of kprobe processing */ preempt_disable(); <-- HERE kcb = get_kprobe_ctlblk(); Signed-off-by: Masami Hiramatsu Cc: Prasanna S Panchamukhi Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 92 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index b40614f..137bf61 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -41,6 +41,49 @@ void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +/* insert a jmp code */ +static inline void set_jmp_op(void *from, void *to) +{ + struct __arch_jmp_op { + char op; + long raddr; + } __attribute__((packed)) *jop; + jop = (struct __arch_jmp_op *)from; + jop->raddr = (long)(to) - ((long)(from) + 5); + jop->op = RELATIVEJUMP_INSTRUCTION; +} + +/* + * returns non-zero if opcodes can be boosted. + */ +static inline int can_boost(kprobe_opcode_t opcode) +{ + switch (opcode & 0xf0 ) { + case 0x70: + return 0; /* can't boost conditional jump */ + case 0x90: + /* can't boost call and pushf */ + return opcode != 0x9a && opcode != 0x9c; + case 0xc0: + /* can't boost undefined opcodes and soft-interruptions */ + return (0xc1 < opcode && opcode < 0xc6) || + (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf; + case 0xd0: + /* can boost AA* and XLAT */ + return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); + case 0xe0: + /* can boost in/out and (may be) jmps */ + return (0xe3 < opcode && opcode != 0xe8); + case 0xf0: + /* clear and set flags can be boost */ + return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); + default: + /* currently, can't boost 2 bytes opcodes */ + return opcode != 0x0f; + } +} + + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -65,6 +108,11 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; + if (can_boost(p->opcode)) { + p->ainsn.boostable = 0; + } else { + p->ainsn.boostable = -1; + } return 0; } @@ -158,6 +206,9 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) kprobe_opcode_t *addr = NULL; unsigned long *lp; struct kprobe_ctlblk *kcb; +#ifdef CONFIG_PREEMPT + unsigned pre_preempt_count = preempt_count(); +#endif /* CONFIG_PREEMPT */ /* * We don't want to be preempted for the entire @@ -252,6 +303,21 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) /* handler has already set things up, so skip ss setup */ return 1; + if (p->ainsn.boostable == 1 && +#ifdef CONFIG_PREEMPT + !(pre_preempt_count) && /* + * This enables booster when the direct + * execution path aren't preempted. + */ +#endif /* CONFIG_PREEMPT */ + !p->post_handler && !p->break_handler ) { + /* Boost up -- we can execute copied instructions directly */ + reset_current_kprobe(); + regs->eip = (unsigned long)p->ainsn.insn; + preempt_enable_no_resched(); + return 1; + } + ss_probe: prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; @@ -357,6 +423,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * 2) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. + * + * This function also checks instruction size for preparing direct execution. */ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) @@ -377,6 +445,7 @@ static void __kprobes resume_execution(struct kprobe *p, case 0xca: case 0xea: /* jmp absolute -- eip is correct */ /* eip is already adjusted, no more changes required */ + p->ainsn.boostable = 1; goto no_change; case 0xe8: /* call relative - Fix return addr */ *tos = orig_eip + (*tos - copy_eip); @@ -384,18 +453,37 @@ static void __kprobes resume_execution(struct kprobe *p, case 0xff: if ((p->ainsn.insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ - /* Fix return addr; eip is correct. */ + /* + * Fix return addr; eip is correct. + * But this is not boostable + */ *tos = orig_eip + (*tos - copy_eip); goto no_change; } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ - /* eip is correct. */ + /* eip is correct. And this is boostable */ + p->ainsn.boostable = 1; goto no_change; } default: break; } + if (p->ainsn.boostable == 0) { + if ((regs->eip > copy_eip) && + (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) { + /* + * These instructions can be executed directly if it + * jumps back to correct address. + */ + set_jmp_op((void *)regs->eip, + (void *)orig_eip + (regs->eip - copy_eip)); + p->ainsn.boostable = 1; + } else { + p->ainsn.boostable = -1; + } + } + regs->eip = orig_eip + (regs->eip - copy_eip); no_change: -- cgit v1.1 From c9becf58d935265919bf1cb348b2c04492c8949d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sun, 26 Mar 2006 01:38:19 -0800 Subject: [PATCH] kretprobe: kretprobe-booster In normal operation, kretprobe makes a target function return to trampoline code. A kprobe (called trampoline_probe) has been inserted in the trampoline code. When the kernel hits this kprobe, it calls kretprobe's handler and it returns to the original return address. Kretprobe-booster removes the trampoline_probe. It allows the trampoline code to call kretprobe's handler directly instead of invoking kprobe. The trampoline code returns to the original return address. (changelog from Chuck Ebbert <76306.1226@compuserve.com> - thanks ;)) Signed-off-by: Masami Hiramatsu Cc: Prasanna S Panchamukhi Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Cc: Chuck Ebbert <76306.1226@compuserve.com> Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 61 +++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 22 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 137bf61..acdcc64 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -333,17 +333,44 @@ no_kprobe: * here. When a retprobed function returns, this probe is hit and * trampoline_probe_handler() runs, calling the kretprobe's handler. */ - void kretprobe_trampoline_holder(void) + void __kprobes kretprobe_trampoline_holder(void) { - asm volatile ( ".global kretprobe_trampoline\n" + asm volatile ( ".global kretprobe_trampoline\n" "kretprobe_trampoline: \n" - "nop\n"); - } + " pushf\n" + /* skip cs, eip, orig_eax, es, ds */ + " subl $20, %esp\n" + " pushl %eax\n" + " pushl %ebp\n" + " pushl %edi\n" + " pushl %esi\n" + " pushl %edx\n" + " pushl %ecx\n" + " pushl %ebx\n" + " movl %esp, %eax\n" + " call trampoline_handler\n" + /* move eflags to cs */ + " movl 48(%esp), %edx\n" + " movl %edx, 44(%esp)\n" + /* save true return address on eflags */ + " movl %eax, 48(%esp)\n" + " popl %ebx\n" + " popl %ecx\n" + " popl %edx\n" + " popl %esi\n" + " popl %edi\n" + " popl %ebp\n" + " popl %eax\n" + /* skip eip, orig_eax, es, ds */ + " addl $16, %esp\n" + " popf\n" + " ret\n"); +} /* - * Called when we hit the probe point at kretprobe_trampoline + * Called from kretprobe_trampoline */ -int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -372,8 +399,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; - if (ri->rp && ri->rp->handler) + if (ri->rp && ri->rp->handler){ + __get_cpu_var(current_kprobe) = &ri->rp->kp; ri->rp->handler(ri, regs); + __get_cpu_var(current_kprobe) = NULL; + } orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri); @@ -388,18 +418,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) } BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); - regs->eip = orig_ret_address; - reset_current_kprobe(); spin_unlock_irqrestore(&kretprobe_lock, flags); - preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + return (void*)orig_ret_address; } /* @@ -646,12 +668,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline_p); + return 0; } -- cgit v1.1 From c6fd91f0bdcd294a0ae0ba2b2a7f7456ef4b7144 Mon Sep 17 00:00:00 2001 From: bibo mao Date: Sun, 26 Mar 2006 01:38:20 -0800 Subject: [PATCH] kretprobe instance recycled by parent process When kretprobe probes the schedule() function, if the probed process exits then schedule() will never return, so some kretprobe instances will never be recycled. In this patch the parent process will recycle retprobe instances of the probed function and there will be no memory leak of kretprobe instances. Signed-off-by: bibo mao Cc: Masami Hiramatsu Cc: Prasanna S Panchamukhi Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/process.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 299e616..24b3e74 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -38,7 +38,6 @@ #include #include #include -#include #include #include @@ -364,13 +363,6 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(tsk); - /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); -- cgit v1.1 From 2326c77017c79fd6d55c69d8a49a57a252921bcd Mon Sep 17 00:00:00 2001 From: "bibo,mao" Date: Sun, 26 Mar 2006 01:38:21 -0800 Subject: [PATCH] kprobe handler: discard user space trap Currently kprobe handler traps only happen in kernel space, so function kprobe_exceptions_notify should skip traps which happen in user space. This patch modifies this, and it is based on 2.6.16-rc4. Signed-off-by: bibo mao Cc: Ananth N Mavinakayanahalli Cc: "Keshavamurthy, Anil S" Cc: Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index acdcc64..df1b346 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -203,13 +203,14 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; - kprobe_opcode_t *addr = NULL; - unsigned long *lp; + kprobe_opcode_t *addr; struct kprobe_ctlblk *kcb; #ifdef CONFIG_PREEMPT unsigned pre_preempt_count = preempt_count(); #endif /* CONFIG_PREEMPT */ + addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); + /* * We don't want to be preempted for the entire * duration of kprobe processing @@ -217,17 +218,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_disable(); kcb = get_kprobe_ctlblk(); - /* Check if the application is using LDT entry for its code segment and - * calculate the address by reading the base address from the LDT entry. - */ - if ((regs->xcs & 4) && (current->mm)) { - lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8) - + (char *) current->mm->context.ldt); - addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip - - sizeof(kprobe_opcode_t)); - } else { - addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); - } /* Check we're not actually recursing */ if (kprobe_running()) { p = get_kprobe(addr); @@ -579,6 +569,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) -- cgit v1.1 From b4026513b88e7eaa52a31117e2b7bafdc1e40ef1 Mon Sep 17 00:00:00 2001 From: Prasanna S Panchamukhi Date: Sun, 26 Mar 2006 01:38:22 -0800 Subject: [PATCH] kprobes: fix broken fault handling for i386 Provide proper kprobes fault handling, if a user-specified pre/post handlers tries to access user address space, through copy_from_user(), get_user() etc. The user-specified fault handler gets called only if the fault occurs while executing user-specified handlers. In such a case user-specified handler is allowed to fix it first, later if the user-specifed fault handler does not fix it, we try to fix it by calling fix_exception(). The user-specified handler will not be called if the fault happens when single stepping the original instruction, instead we reset the current probe and allow the system page fault handler to fix it up. Signed-off-by: Prasanna S Panchamukhi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/kprobes.c | 57 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 7 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index df1b346..f197687 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -35,6 +35,7 @@ #include #include #include +#include void jprobe_return_end(void); @@ -547,15 +548,57 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs, kcb); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the eip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->eip = (unsigned long)cur->addr; regs->eflags |= kcb->kprobe_old_eflags; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } -- cgit v1.1 From dbffa471611d3fc4b401ebabf7bb63ac0e0272b1 Mon Sep 17 00:00:00 2001 From: OGAWA Hirofumi Date: Mon, 27 Mar 2006 01:14:26 -0800 Subject: [PATCH] PM-Timer: don't use workaround if chipset is not buggy Current timer_pm.c reads I/O port triple times, in order to avoid the bug of chipset. But I/O port is slow. 2.6.16 (pmtmr) Simple gettimeofday: 3.6532 microseconds 2.6.16+patch (pmtmr) Simple gettimeofday: 1.4582 microseconds [if chip is buggy, probably it will be 7us or more in 4.2% of probability.] This patch adds blacklist of buggy chip, and if chip is not buggy, this uses fast normal version instead of slow workaround version. If chip is buggy, warnings "pmtmr is slow". But sounds like there is gray zone. I found the PIIX4 errata, but I couldn't find the ICH4 errata. But some motherboard seems to have problem. So, if we found a ICH4, generate warnings, and use a workaround version. If user's ICH4 is good, the user can specify the "pmtmr_good" boot parameter to use fast version. Acked-by: John Stultz Signed-off-by: OGAWA Hirofumi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/timers/timer_pm.c | 104 +++++++++++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 15 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index 264edaa..144e94a 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -45,24 +46,31 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ +static int pmtmr_need_workaround __read_mostly = 1; + /*helper function to safely read acpi pm timesource*/ static inline u32 read_pmtmr(void) { - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; + if (pmtmr_need_workaround) { + u32 v1, v2, v3; + + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; + } + + return inl(pmtmr_ioport) & ACPI_PM_MASK; } @@ -263,6 +271,72 @@ struct init_timer_opts __initdata timer_pmtmr_init = { .opts = &timer_pmtmr, }; +#ifdef CONFIG_PCI +/* + * PIIX4 Errata: + * + * The power management timer may return improper results when read. + * Although the timer value settles properly after incrementing, + * while incrementing there is a 3 ns window every 69.8 ns where the + * timer value is indeterminate (a 4.2% chance that the data will be + * incorrect when read). As a result, the ACPI free running count up + * timer specification is violated due to erroneous reads. + */ +static int __init pmtmr_bug_check(void) +{ + static struct pci_device_id gray_list[] __initdata = { + /* these chipsets may have bug. */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_0) }, + { }, + }; + struct pci_dev *dev; + int pmtmr_has_bug = 0; + u8 rev; + + if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround) + return 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, NULL); + if (dev) { + pci_read_config_byte(dev, PCI_REVISION_ID, &rev); + /* the bug has been fixed in PIIX4M */ + if (rev < 3) { + printk(KERN_WARNING "* Found PM-Timer Bug on this " + "chipset. Due to workarounds for a bug,\n" + "* this time source is slow. Consider trying " + "other time sources (clock=)\n"); + pmtmr_has_bug = 1; + } + pci_dev_put(dev); + } + + if (pci_dev_present(gray_list)) { + printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due" + " to workarounds for a bug,\n" + "* this time source is slow. If you are sure your timer" + " does not have\n" + "* this bug, please use \"pmtmr_good\" to disable the " + "workaround\n"); + pmtmr_has_bug = 1; + } + + if (!pmtmr_has_bug) + pmtmr_need_workaround = 0; + + return 0; +} +device_initcall(pmtmr_bug_check); +#endif + +static int __init pmtr_good_setup(char *__str) +{ + pmtmr_need_workaround = 0; + return 1; +} +__setup("pmtmr_good", pmtr_good_setup); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dominik Brodowski "); MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); -- cgit v1.1 From 1e9f28fa1eb9773bf65bae08288c6a0a38eef4a7 Mon Sep 17 00:00:00 2001 From: "Siddha, Suresh B" Date: Mon, 27 Mar 2006 01:15:22 -0800 Subject: [PATCH] sched: new sched domain for representing multi-core Add a new sched domain for representing multi-core with shared caches between cores. Consider a dual package system, each package containing two cores and with last level cache shared between cores with in a package. If there are two runnable processes, with this appended patch those two processes will be scheduled on different packages. On such systems, with this patch we have observed 8% perf improvement with specJBB(2 warehouse) benchmark and 35% improvement with CFP2000 rate(with 2 users). This new domain will come into play only on multi-core systems with shared caches. On other systems, this sched domain will be removed by domain degeneration code. This new domain can be also used for implementing power savings policy (see OLS 2005 CMP kernel scheduler paper for more details.. I will post another patch for power savings policy soon) Most of the arch/* file changes are for cpu_coregroup_map() implementation. Signed-off-by: Suresh Siddha Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/common.c | 10 +++++++--- arch/i386/kernel/cpu/intel_cacheinfo.c | 22 ++++++++++++++++++++-- arch/i386/kernel/smpboot.c | 24 ++++++++++++++++++++++++ 3 files changed, 51 insertions(+), 5 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 7e3d6b6..a06a490 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -266,7 +266,7 @@ static void __init early_cpu_detect(void) void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; + int ebx; if (have_cpuid_p()) { /* Get vendor name */ @@ -282,7 +282,7 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -292,6 +292,11 @@ void __cpuinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; +#ifdef CONFIG_SMP + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -474,7 +479,6 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ce61921..7e7fd4e 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -173,6 +173,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_SMP + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif if (c->cpuid_level > 3) { static int is_initialized; @@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) break; case 2: new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; break; case 3: new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; break; default: break; @@ -273,11 +283,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) if (new_l1i) l1i = new_l1i; - if (new_l2) + if (new_l2) { l2 = new_l2; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l2_id; +#endif + } - if (new_l3) + if (new_l3) { l3 = new_l3; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l3_id; +#endif + } if ( trace ) printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 82371d8..a696990 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* Last level cache ID of each logical CPU */ +int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -440,6 +443,18 @@ static void __devinit smp_callin(void) static int cpucount; +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); -- cgit v1.1 From b06be912a3ad68c69dba0ed6e92723140020e392 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Mar 2006 01:15:24 -0800 Subject: [PATCH] x86: don't use cpuid.2 to determine cache info if cpuid.4 is supported Don't use cpuid.2 to determine cache info if cpuid.4 is supported. The exception is P4 trace cache. We always use cpuid.2 to get trace cache under P4. Signed-off-by: Shaohua Li Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/cpu/intel_cacheinfo.c | 69 ++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 28 deletions(-) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index 7e7fd4e..9df87b0 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -225,11 +225,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } - if (c->cpuid_level > 1) { + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int i, j, n; int regs[4]; unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; @@ -251,6 +259,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; @@ -276,43 +286,46 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } + } - if (new_l1d) - l1d = new_l1d; + if (new_l1d) + l1d = new_l1d; - if (new_l1i) - l1i = new_l1i; + if (new_l1i) + l1i = new_l1i; - if (new_l2) { - l2 = new_l2; + if (new_l2) { + l2 = new_l2; #ifdef CONFIG_SMP - cpu_llc_id[cpu] = l2_id; + cpu_llc_id[cpu] = l2_id; #endif - } + } - if (new_l3) { - l3 = new_l3; + if (new_l3) { + l3 = new_l3; #ifdef CONFIG_SMP - cpu_llc_id[cpu] = l3_id; + cpu_llc_id[cpu] = l3_id; #endif - } - - if ( trace ) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - if ( l1d ) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); - - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); } + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); + + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); + return l2; } -- cgit v1.1 From 22a9835c350782a5c3257343713932af3ac92ee0 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 27 Mar 2006 01:16:04 -0800 Subject: [PATCH] unify PFN_* macros Just about every architecture defines some macros to do operations on pfns. They're all virtually identical. This patch consolidates all of them. One minor glitch is that at least i386 uses them in a very skeletal header file. To keep away from #include dependency hell, I stuck the new definitions in a new, isolated header. Of all of the implementations, sh64 is the only one that varied by a bit. It used some masks to ensure that any sign-extension got ripped away before the arithmetic is done. This has been posted to that sh64 maintainers and the development list. Compiles on x86, x86_64, ia64 and ppc64. Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/i386/kernel') diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 6917daa..8c08660 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -46,6 +46,7 @@ #include #include #include +#include #include