diff options
Diffstat (limited to 'arch/i386/kernel')
55 files changed, 1421 insertions, 790 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 65656c0..5b9ed21 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - quirks.o i8237.o topology.o + quirks.o i8237.o topology.o alternative.o obj-y += cpu/ obj-y += timers/ diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index f1a2194..0330661 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -668,10 +668,10 @@ unsigned long __init acpi_find_rsdp(void) unsigned long rsdp_phys = 0; if (efi_enabled) { - if (efi.acpi20) - return __pa(efi.acpi20); - else if (efi.acpi) - return __pa(efi.acpi); + if (efi.acpi20 != EFI_INVALID_TABLE_ADDR) + return efi.acpi20; + else if (efi.acpi != EFI_INVALID_TABLE_ADDR) + return efi.acpi; } /* * Scan memory looking for the RSDP signature. First search EBDA (low diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c new file mode 100644 index 0000000..5cbd6f9 --- /dev/null +++ b/arch/i386/kernel/alternative.c @@ -0,0 +1,321 @@ +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <asm/alternative.h> +#include <asm/sections.h> + +#define DEBUG 0 +#if DEBUG +# define DPRINTK(fmt, args...) printk(fmt, args) +#else +# define DPRINTK(fmt, args...) +#endif + +/* Use inline assembly to define this because the nops are defined + as inline assembly strings in the include files and we cannot + get them easily into strings. */ +asm("\t.data\nintelnops: " + GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 + GENERIC_NOP7 GENERIC_NOP8); +asm("\t.data\nk8nops: " + K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 + K8_NOP7 K8_NOP8); +asm("\t.data\nk7nops: " + K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 + K7_NOP7 K7_NOP8); + +extern unsigned char intelnops[], k8nops[], k7nops[]; +static unsigned char *intel_nops[ASM_NOP_MAX+1] = { + NULL, + intelnops, + intelnops + 1, + intelnops + 1 + 2, + intelnops + 1 + 2 + 3, + intelnops + 1 + 2 + 3 + 4, + intelnops + 1 + 2 + 3 + 4 + 5, + intelnops + 1 + 2 + 3 + 4 + 5 + 6, + intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k8_nops[ASM_NOP_MAX+1] = { + NULL, + k8nops, + k8nops + 1, + k8nops + 1 + 2, + k8nops + 1 + 2 + 3, + k8nops + 1 + 2 + 3 + 4, + k8nops + 1 + 2 + 3 + 4 + 5, + k8nops + 1 + 2 + 3 + 4 + 5 + 6, + k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k7_nops[ASM_NOP_MAX+1] = { + NULL, + k7nops, + k7nops + 1, + k7nops + 1 + 2, + k7nops + 1 + 2 + 3, + k7nops + 1 + 2 + 3 + 4, + k7nops + 1 + 2 + 3 + 4 + 5, + k7nops + 1 + 2 + 3 + 4 + 5 + 6, + k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static struct nop { + int cpuid; + unsigned char **noptable; +} noptypes[] = { + { X86_FEATURE_K8, k8_nops }, + { X86_FEATURE_K7, k7_nops }, + { -1, NULL } +}; + + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; +extern u8 *__smp_locks[], *__smp_locks_end[]; + +extern u8 __smp_alt_begin[], __smp_alt_end[]; + + +static unsigned char** find_nop_table(void) +{ + unsigned char **noptable = intel_nops; + int i; + + for (i = 0; noptypes[i].cpuid >= 0; i++) { + if (boot_cpu_has(noptypes[i].cpuid)) { + noptable = noptypes[i].noptable; + break; + } + } + return noptable; +} + +/* Replace instructions with better alternatives for this CPU type. + This runs before SMP is initialized to avoid SMP problems with + self modifying code. This implies that assymetric systems where + APs have less capabilities than the boot processor are not handled. + Tough. Make sure you disable such features by hand. */ + +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + unsigned char **noptable = find_nop_table(); + struct alt_instr *a; + int diff, i, k; + + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + BUG_ON(a->replacementlen > a->instrlen); + if (!boot_cpu_has(a->cpuid)) + continue; + memcpy(a->instr, a->replacement, a->replacementlen); + diff = a->instrlen - a->replacementlen; + /* Pad the rest with nops */ + for (i = a->replacementlen; diff > 0; diff -= k, i += k) { + k = diff; + if (k > ASM_NOP_MAX) + k = ASM_NOP_MAX; + memcpy(a->instr + i, noptable[k], k); + } + } +} + +static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + memcpy(a->replacement + a->replacementlen, + a->instr, + a->instrlen); + } +} + +static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + for (a = start; a < end; a++) { + memcpy(a->instr, + a->replacement + a->replacementlen, + a->instrlen); + } +} + +static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = 0xf0; /* lock prefix */ + }; +} + +static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + unsigned char **noptable = find_nop_table(); + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = noptable[1][0]; + }; +} + +struct smp_alt_module { + /* what is this ??? */ + struct module *mod; + char *name; + + /* ptrs to lock prefixes */ + u8 **locks; + u8 **locks_end; + + /* .text segment, needed to avoid patching init code ;) */ + u8 *text; + u8 *text_end; + + struct list_head next; +}; +static LIST_HEAD(smp_alt_modules); +static DEFINE_SPINLOCK(smp_alt); + +static int smp_alt_once = 0; +static int __init bootonly(char *str) +{ + smp_alt_once = 1; + return 1; +} +__setup("smp-alt-boot", bootonly); + +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ + struct smp_alt_module *smp; + unsigned long flags; + + if (smp_alt_once) { + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(locks, locks_end, + text, text_end); + return; + } + + smp = kzalloc(sizeof(*smp), GFP_KERNEL); + if (NULL == smp) + return; /* we'll run the (safe but slow) SMP code then ... */ + + smp->mod = mod; + smp->name = name; + smp->locks = locks; + smp->locks_end = locks_end; + smp->text = text; + smp->text_end = text_end; + DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", + __FUNCTION__, smp->locks, smp->locks_end, + smp->text, smp->text_end, smp->name); + + spin_lock_irqsave(&smp_alt, flags); + list_add_tail(&smp->next, &smp_alt_modules); + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(smp->locks, smp->locks_end, + smp->text, smp->text_end); + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_module_del(struct module *mod) +{ + struct smp_alt_module *item; + unsigned long flags; + + if (smp_alt_once) + return; + + spin_lock_irqsave(&smp_alt, flags); + list_for_each_entry(item, &smp_alt_modules, next) { + if (mod != item->mod) + continue; + list_del(&item->next); + spin_unlock_irqrestore(&smp_alt, flags); + DPRINTK("%s: %s\n", __FUNCTION__, item->name); + kfree(item); + return; + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_switch(int smp) +{ + struct smp_alt_module *mod; + unsigned long flags; + + if (smp_alt_once) + return; + BUG_ON(!smp && (num_online_cpus() > 1)); + + spin_lock_irqsave(&smp_alt, flags); + if (smp) { + printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); + clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + alternatives_smp_apply(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_lock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } else { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_unlock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + /* switch to patch-once-at-boottime-only mode and free the + * tables in case we know the number of CPUs will never ever + * change */ +#ifdef CONFIG_HOTPLUG_CPU + if (num_possible_cpus() < 2) + smp_alt_once = 1; +#else + smp_alt_once = 1; +#endif + + if (smp_alt_once) { + if (1 == num_possible_cpus()) { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_unlock(__smp_locks, __smp_locks_end, + _text, _etext); + } + free_init_pages("SMP alternatives", + (unsigned long)__smp_alt_begin, + (unsigned long)__smp_alt_end); + } else { + alternatives_smp_save(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_module_add(NULL, "core kernel", + __smp_locks, __smp_locks_end, + _text, _etext); + alternatives_smp_switch(0); + } +} diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index f39e09e..6273bf7 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -38,6 +38,7 @@ #include <asm/i8253.h> #include <mach_apic.h> +#include <mach_apicdef.h> #include <mach_ipi.h> #include "io_ports.h" @@ -414,6 +415,7 @@ void __init init_bsp_APIC(void) void __devinit setup_local_APIC(void) { unsigned long oldvalue, value, ver, maxlvt; + int i, j; /* Pound the ESR really hard over the head with a big hammer - mbligh */ if (esr_disable) { @@ -451,6 +453,25 @@ void __devinit setup_local_APIC(void) apic_write_around(APIC_TASKPRI, value); /* + * After a crash, we no longer service the interrupts and a pending + * interrupt from previous kernel might still have ISR bit set. + * + * Most probably by now CPU has serviced that pending interrupt and + * it might not have done the ack_APIC_irq() because it thought, + * interrupt came from i8259 as ExtInt. LAPIC did not get EOI so it + * does not clear the ISR bit and cpu thinks it has already serivced + * the interrupt. Hence a vector might get locked. It was noticed + * for timer irq (vector 0x31). Issue an extra EOI to clear ISR. + */ + for (i = APIC_ISR_NR - 1; i >= 0; i--) { + value = apic_read(APIC_ISR + i*0x10); + for (j = 31; j >= 0; j--) { + if (value & (1<<j)) + ack_APIC_irq(); + } + } + + /* * Now that we are all set up, enable the APIC */ value = apic_read(APIC_SPIV); @@ -570,16 +591,18 @@ void __devinit setup_local_APIC(void) */ void lapic_shutdown(void) { + unsigned long flags; + if (!cpu_has_apic) return; - local_irq_disable(); + local_irq_save(flags); clear_local_APIC(); if (enabled_via_apicbase) disable_local_APIC(); - local_irq_enable(); + local_irq_restore(flags); } #ifdef CONFIG_PM @@ -729,7 +752,7 @@ static int __init apic_set_verbosity(char *str) printk(KERN_WARNING "APIC Verbosity level %s not recognised" " use apic=verbose or apic=debug\n", str); - return 0; + return 1; } __setup("apic=", apic_set_verbosity); diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 05312a8..da30a37 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c @@ -824,8 +824,6 @@ static void apm_do_busy(void) static void (*original_pm_idle)(void); -extern void default_idle(void); - /** * apm_cpu_idle - cpu idling for APM capable Linux * diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index f52669e..bd75629 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -4,6 +4,7 @@ #include <asm/processor.h> #include <asm/msr.h> #include <asm/e820.h> +#include <asm/mtrr.h> #include "cpu.h" #ifdef CONFIG_X86_OOSTORE diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index e6bd095..a06a490 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -25,9 +25,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); -static int cachesize_override __devinitdata = -1; -static int disable_x86_fxsr __devinitdata = 0; -static int disable_x86_serial_nr __devinitdata = 1; +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata; +static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -59,7 +60,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __devinit get_model_name(struct cpuinfo_x86 *c) +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +90,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c) } -void __devinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +131,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +152,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -187,6 +188,14 @@ static int __init x86_fxsr_setup(char * s) __setup("nofxsr", x86_fxsr_setup); +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + /* Standard macro to see if a specific flag is changeable */ static inline int flag_is_changeable_p(u32 flag) { @@ -210,7 +219,7 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __devinit have_cpuid_p(void) +static int __cpuinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -254,10 +263,10 @@ static void __init early_cpu_detect(void) } } -void __devinit generic_identify(struct cpuinfo_x86 * c) +void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; - int junk; + int ebx; if (have_cpuid_p()) { /* Get vendor name */ @@ -273,7 +282,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) /* Intel-defined flags: level 0x00000001 */ if ( c->cpuid_level >= 0x00000001 ) { u32 capability, excap; - cpuid(0x00000001, &tfms, &junk, &excap, &capability); + cpuid(0x00000001, &tfms, &ebx, &excap, &capability); c->x86_capability[0] = capability; c->x86_capability[4] = excap; c->x86 = (tfms >> 8) & 15; @@ -283,6 +292,11 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; c->x86_mask = tfms & 15; +#ifdef CONFIG_SMP + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); +#else + c->apicid = (ebx >> 24) & 0xFF; +#endif } else { /* Have CPUID level 0 only - unheard of */ c->x86 = 4; @@ -307,7 +321,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) #endif } -static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -335,7 +349,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __devinit identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -405,6 +419,10 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) clear_bit(X86_FEATURE_XMM, c->x86_capability); } + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); @@ -417,7 +435,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) else /* Last resort... */ sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); + c->x86, c->x86_model); } /* Now the feature flags better reflect actual CPU features! */ @@ -453,7 +471,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_HT -void __devinit detect_ht(struct cpuinfo_x86 *c) +void __cpuinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -461,7 +479,6 @@ void __devinit detect_ht(struct cpuinfo_x86 *c) cpuid(1, &eax, &ebx, &ecx, &edx); - c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; @@ -500,7 +517,7 @@ void __devinit detect_ht(struct cpuinfo_x86 *c) } #endif -void __devinit print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -523,7 +540,7 @@ void __devinit print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -570,7 +587,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __devinit cpu_init(void) +void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); @@ -670,7 +687,7 @@ void __devinit cpu_init(void) } #ifdef CONFIG_HOTPLUG_CPU -void __devinit cpu_uninit(void) +void __cpuinit cpu_uninit(void) { int cpu = raw_smp_processor_id(); cpu_clear(cpu, cpu_initialized); diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index 26892d2..e44a4c6 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -96,7 +96,6 @@ config X86_POWERNOW_K8_ACPI config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" - depends on PCI help This add the CPUFreq driver for NatSemi Geode processors which support suspend modulation. @@ -115,9 +114,9 @@ config X86_SPEEDSTEP_CENTRINO you also need to say Y to "Use ACPI tables to decode..." below [which might imply enabling ACPI] if you want to use this driver on non-Banias CPUs. - + For details, take a look at <file:Documentation/cpu-freq/>. - + If in doubt, say N. config X86_SPEEDSTEP_CENTRINO_ACPI @@ -148,7 +147,7 @@ config X86_SPEEDSTEP_ICH help This adds the CPUFreq driver for certain mobile Intel Pentium III (Coppermine), all mobile Intel Pentium III-M (Tualatin) and all - mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, + mobile Intel Pentium 4 P4-M on systems which have an Intel ICH2, ICH3 or ICH4 southbridge. For details, take a look at <file:Documentation/cpu-freq/>. @@ -161,7 +160,7 @@ config X86_SPEEDSTEP_SMI depends on EXPERIMENTAL help This adds the CPUFreq driver for certain mobile Intel Pentium III - (Coppermine), all mobile Intel Pentium III-M (Tualatin) + (Coppermine), all mobile Intel Pentium III-M (Tualatin) on systems which have an Intel 440BX/ZX/MX southbridge. For details, take a look at <file:Documentation/cpu-freq/>. @@ -203,9 +202,10 @@ config X86_LONGRUN config X86_LONGHAUL tristate "VIA Cyrix III Longhaul" select CPU_FREQ_TABLE + depends on BROKEN help - This adds the CPUFreq driver for VIA Samuel/CyrixIII, - VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T + This adds the CPUFreq driver for VIA Samuel/CyrixIII, + VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T processors. For details, take a look at <file:Documentation/cpu-freq/>. @@ -215,11 +215,11 @@ config X86_LONGHAUL comment "shared options" config X86_ACPI_CPUFREQ_PROC_INTF - bool "/proc/acpi/processor/../performance interface (deprecated)" + bool "/proc/acpi/processor/../performance interface (deprecated)" depends on PROC_FS depends on X86_ACPI_CPUFREQ || X86_SPEEDSTEP_CENTRINO_ACPI || X86_POWERNOW_K7_ACPI || X86_POWERNOW_K8_ACPI help - This enables the deprecated /proc/acpi/processor/../performance + This enables the deprecated /proc/acpi/processor/../performance interface. While it is helpful for debugging, the generic, cross-architecture cpufreq interfaces should be used. @@ -233,9 +233,9 @@ config X86_SPEEDSTEP_RELAXED_CAP_CHECK bool "Relaxed speedstep capability checks" depends on (X86_SPEEDSTEP_SMI || X86_SPEEDSTEP_ICH) help - Don't perform all checks for a speedstep capable system which would - normally be done. Some ancient or strange systems, though speedstep - capable, don't always indicate that they are speedstep capable. This + Don't perform all checks for a speedstep capable system which would + normally be done. Some ancient or strange systems, though speedstep + capable, don't always indicate that they are speedstep capable. This option lets the probing code bypass some of those checks if the parameter "relaxed_check=1" is passed to the module. diff --git a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c index 2b62dee..f275e0d 100644 --- a/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c +++ b/arch/i386/kernel/cpu/cpufreq/cpufreq-nforce2.c @@ -39,7 +39,7 @@ static struct pci_dev *nforce2_chipset_dev; static int fid = 0; /* min_fsb, max_fsb: - * minimum and maximum FSB (= FSB at boot time) + * minimum and maximum FSB (= FSB at boot time) */ static int min_fsb = 0; static int max_fsb = 0; @@ -57,10 +57,10 @@ MODULE_PARM_DESC(min_fsb, #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "cpufreq-nforce2", msg) -/* +/** * nforce2_calc_fsb - calculate FSB * @pll: PLL value - * + * * Calculates FSB from PLL value */ static int nforce2_calc_fsb(int pll) @@ -76,10 +76,10 @@ static int nforce2_calc_fsb(int pll) return 0; } -/* +/** * nforce2_calc_pll - calculate PLL value * @fsb: FSB - * + * * Calculate PLL value for given FSB */ static int nforce2_calc_pll(unsigned int fsb) @@ -106,10 +106,10 @@ static int nforce2_calc_pll(unsigned int fsb) return NFORCE2_PLL(mul, div); } -/* +/** * nforce2_write_pll - write PLL value to chipset * @pll: PLL value - * + * * Writes new FSB PLL value to chipset */ static void nforce2_write_pll(int pll) @@ -121,15 +121,13 @@ static void nforce2_write_pll(int pll) pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLADR, temp); /* Now write the value in all 64 registers */ - for (temp = 0; temp <= 0x3f; temp++) { - pci_write_config_dword(nforce2_chipset_dev, - NFORCE2_PLLREG, pll); - } + for (temp = 0; temp <= 0x3f; temp++) + pci_write_config_dword(nforce2_chipset_dev, NFORCE2_PLLREG, pll); return; } -/* +/** * nforce2_fsb_read - Read FSB * * Read FSB from chipset @@ -140,39 +138,32 @@ static unsigned int nforce2_fsb_read(int bootfsb) struct pci_dev *nforce2_sub5; u32 fsb, temp = 0; - /* Get chipset boot FSB from subdevice 5 (FSB at boot-time) */ nforce2_sub5 = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - 0x01EF, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); - + 0x01EF,PCI_ANY_ID,PCI_ANY_ID,NULL); if (!nforce2_sub5) return 0; pci_read_config_dword(nforce2_sub5, NFORCE2_BOOTFSB, &fsb); fsb /= 1000000; - + /* Check if PLL register is already set */ - pci_read_config_byte(nforce2_chipset_dev, - NFORCE2_PLLENABLE, (u8 *)&temp); - + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); + if(bootfsb || !temp) return fsb; /* Use PLL register FSB value */ - pci_read_config_dword(nforce2_chipset_dev, - NFORCE2_PLLREG, &temp); + pci_read_config_dword(nforce2_chipset_dev,NFORCE2_PLLREG, &temp); fsb = nforce2_calc_fsb(temp); return fsb; } -/* +/** * nforce2_set_fsb - set new FSB * @fsb: New FSB - * + * * Sets new FSB */ static int nforce2_set_fsb(unsigned int fsb) @@ -186,7 +177,7 @@ static int nforce2_set_fsb(unsigned int fsb) printk(KERN_ERR "cpufreq: FSB %d is out of range!\n", fsb); return -EINVAL; } - + tfsb = nforce2_fsb_read(0); if (!tfsb) { printk(KERN_ERR "cpufreq: Error while reading the FSB\n"); @@ -194,8 +185,7 @@ static int nforce2_set_fsb(unsigned int fsb) } /* First write? Then set actual value */ - pci_read_config_byte(nforce2_chipset_dev, - NFORCE2_PLLENABLE, (u8 *)&temp); + pci_read_config_byte(nforce2_chipset_dev,NFORCE2_PLLENABLE, (u8 *)&temp); if (!temp) { pll = nforce2_calc_pll(tfsb); @@ -223,7 +213,7 @@ static int nforce2_set_fsb(unsigned int fsb) /* Calculate the PLL reg. value */ if ((pll = nforce2_calc_pll(tfsb)) == -1) return -EINVAL; - + nforce2_write_pll(pll); #ifdef NFORCE2_DELAY mdelay(NFORCE2_DELAY); @@ -239,7 +229,7 @@ static int nforce2_set_fsb(unsigned int fsb) /** * nforce2_get - get the CPU frequency * @cpu: CPU number - * + * * Returns the CPU frequency */ static unsigned int nforce2_get(unsigned int cpu) @@ -354,10 +344,10 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy) printk(KERN_INFO "cpufreq: FSB currently at %i MHz, FID %d.%d\n", fsb, fid / 10, fid % 10); - + /* Set maximum FSB to FSB at boot time */ max_fsb = nforce2_fsb_read(1); - + if(!max_fsb) return -EIO; @@ -398,17 +388,15 @@ static struct cpufreq_driver nforce2_driver = { * nforce2_detect_chipset - detect the Southbridge which contains FSB PLL logic * * Detects nForce2 A2 and C1 stepping - * + * */ static unsigned int nforce2_detect_chipset(void) { u8 revision; nforce2_chipset_dev = pci_get_subsys(PCI_VENDOR_ID_NVIDIA, - PCI_DEVICE_ID_NVIDIA_NFORCE2, - PCI_ANY_ID, - PCI_ANY_ID, - NULL); + PCI_DEVICE_ID_NVIDIA_NFORCE2, + PCI_ANY_ID, PCI_ANY_ID, NULL); if (nforce2_chipset_dev == NULL) return -ENODEV; diff --git a/arch/i386/kernel/cpu/cpufreq/elanfreq.c b/arch/i386/kernel/cpu/cpufreq/elanfreq.c index 3f7caa4..f317276 100644 --- a/arch/i386/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/i386/kernel/cpu/cpufreq/elanfreq.c @@ -1,16 +1,16 @@ /* - * elanfreq: cpufreq driver for the AMD ELAN family + * elanfreq: cpufreq driver for the AMD ELAN family * * (c) Copyright 2002 Robert Schwebel <r.schwebel@pengutronix.de> * - * Parts of this code are (c) Sven Geggus <sven@geggus.net> + * Parts of this code are (c) Sven Geggus <sven@geggus.net> * - * All Rights Reserved. + * All Rights Reserved. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. + * 2 of the License, or (at your option) any later version. * * 2002-02-13: - initial revision for 2.4.18-pre9 by Robert Schwebel * @@ -28,7 +28,7 @@ #include <asm/timex.h> #include <asm/io.h> -#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ +#define REG_CSCIR 0x22 /* Chip Setup and Control Index Register */ #define REG_CSCDR 0x23 /* Chip Setup and Control Data Register */ /* Module parameter */ @@ -41,7 +41,7 @@ struct s_elan_multiplier { }; /* - * It is important that the frequencies + * It is important that the frequencies * are listed in ascending order here! */ struct s_elan_multiplier elan_multiplier[] = { @@ -72,78 +72,79 @@ static struct cpufreq_frequency_table elanfreq_table[] = { * elanfreq_get_cpu_frequency: determine current cpu speed * * Finds out at which frequency the CPU of the Elan SOC runs - * at the moment. Frequencies from 1 to 33 MHz are generated + * at the moment. Frequencies from 1 to 33 MHz are generated * the normal way, 66 and 99 MHz are called "Hyperspeed Mode" - * and have the rest of the chip running with 33 MHz. + * and have the rest of the chip running with 33 MHz. */ static unsigned int elanfreq_get_cpu_frequency(unsigned int cpu) { - u8 clockspeed_reg; /* Clock Speed Register */ - + u8 clockspeed_reg; /* Clock Speed Register */ + local_irq_disable(); - outb_p(0x80,REG_CSCIR); - clockspeed_reg = inb_p(REG_CSCDR); + outb_p(0x80,REG_CSCIR); + clockspeed_reg = inb_p(REG_CSCDR); local_irq_enable(); - if ((clockspeed_reg & 0xE0) == 0xE0) { return 0; } + if ((clockspeed_reg & 0xE0) == 0xE0) + return 0; - /* Are we in CPU clock multiplied mode (66/99 MHz)? */ - if ((clockspeed_reg & 0xE0) == 0xC0) { - if ((clockspeed_reg & 0x01) == 0) { + /* Are we in CPU clock multiplied mode (66/99 MHz)? */ + if ((clockspeed_reg & 0xE0) == 0xC0) { + if ((clockspeed_reg & 0x01) == 0) return 66000; - } else { - return 99000; - } - } + else + return 99000; + } /* 33 MHz is not 32 MHz... */ if ((clockspeed_reg & 0xE0)==0xA0) return 33000; - return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); + return ((1<<((clockspeed_reg & 0xE0) >> 5)) * 1000); } /** - * elanfreq_set_cpu_frequency: Change the CPU core frequency - * @cpu: cpu number + * elanfreq_set_cpu_frequency: Change the CPU core frequency + * @cpu: cpu number * @freq: frequency in kHz * - * This function takes a frequency value and changes the CPU frequency + * This function takes a frequency value and changes the CPU frequency * according to this. Note that the frequency has to be checked by * elanfreq_validatespeed() for correctness! - * - * There is no return value. + * + * There is no return value. */ -static void elanfreq_set_cpu_state (unsigned int state) { - +static void elanfreq_set_cpu_state (unsigned int state) +{ struct cpufreq_freqs freqs; freqs.old = elanfreq_get_cpu_frequency(0); freqs.new = elan_multiplier[state].clock; freqs.cpu = 0; /* elanfreq.c is UP only driver */ - + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); - printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n",elan_multiplier[state].clock); + printk(KERN_INFO "elanfreq: attempting to set frequency to %i kHz\n", + elan_multiplier[state].clock); - /* - * Access to the Elan's internal registers is indexed via - * 0x22: Chip Setup & Control Register Index Register (CSCI) - * 0x23: Chip Setup & Control Register Data Register (CSCD) + /* + * Access to the Elan's internal registers is indexed via + * 0x22: Chip Setup & Control Register Index Register (CSCI) + * 0x23: Chip Setup & Control Register Data Register (CSCD) * */ - /* - * 0x40 is the Power Management Unit's Force Mode Register. + /* + * 0x40 is the Power Management Unit's Force Mode Register. * Bit 6 enables Hyperspeed Mode (66/100 MHz core frequency) */ local_irq_disable(); - outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ + outb_p(0x40,REG_CSCIR); /* Disable hyperspeed mode */ outb_p(0x00,REG_CSCDR); local_irq_enable(); /* wait till internal pipelines and */ udelay(1000); /* buffers have cleaned up */ @@ -166,10 +167,10 @@ static void elanfreq_set_cpu_state (unsigned int state) { /** * elanfreq_validatespeed: test if frequency range is valid - * @policy: the policy to validate + * @policy: the policy to validate * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. */ static int elanfreq_verify (struct cpufreq_policy *policy) @@ -177,11 +178,11 @@ static int elanfreq_verify (struct cpufreq_policy *policy) return cpufreq_frequency_table_verify(policy, &elanfreq_table[0]); } -static int elanfreq_target (struct cpufreq_policy *policy, - unsigned int target_freq, +static int elanfreq_target (struct cpufreq_policy *policy, + unsigned int target_freq, unsigned int relation) { - unsigned int newstate = 0; + unsigned int newstate = 0; if (cpufreq_frequency_table_target(policy, &elanfreq_table[0], target_freq, relation, &newstate)) return -EINVAL; @@ -212,7 +213,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) max_freq = elanfreq_get_cpu_frequency(0); /* table init */ - for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i=0; (elanfreq_table[i].frequency != CPUFREQ_TABLE_END); i++) { if (elanfreq_table[i].frequency > max_freq) elanfreq_table[i].frequency = CPUFREQ_ENTRY_INVALID; } @@ -226,8 +227,7 @@ static int elanfreq_cpu_init(struct cpufreq_policy *policy) if (result) return (result); - cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); - + cpufreq_frequency_table_get_attr(elanfreq_table, policy->cpu); return 0; } @@ -268,9 +268,9 @@ static struct freq_attr* elanfreq_attr[] = { static struct cpufreq_driver elanfreq_driver = { - .get = elanfreq_get_cpu_frequency, - .verify = elanfreq_verify, - .target = elanfreq_target, + .get = elanfreq_get_cpu_frequency, + .verify = elanfreq_verify, + .target = elanfreq_target, .init = elanfreq_cpu_init, .exit = elanfreq_cpu_exit, .name = "elanfreq", @@ -279,23 +279,21 @@ static struct cpufreq_driver elanfreq_driver = { }; -static int __init elanfreq_init(void) -{ +static int __init elanfreq_init(void) +{ struct cpuinfo_x86 *c = cpu_data; /* Test if we have the right hardware */ if ((c->x86_vendor != X86_VENDOR_AMD) || - (c->x86 != 4) || (c->x86_model!=10)) - { + (c->x86 != 4) || (c->x86_model!=10)) { printk(KERN_INFO "elanfreq: error: no Elan processor found!\n"); return -ENODEV; } - return cpufreq_register_driver(&elanfreq_driver); } -static void __exit elanfreq_exit(void) +static void __exit elanfreq_exit(void) { cpufreq_unregister_driver(&elanfreq_driver); } @@ -309,4 +307,3 @@ MODULE_DESCRIPTION("cpufreq driver for AMD's Elan CPUs"); module_init(elanfreq_init); module_exit(elanfreq_exit); - diff --git a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c index e86ea48..92afa3b 100644 --- a/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c +++ b/arch/i386/kernel/cpu/cpufreq/gx-suspmod.c @@ -6,12 +6,12 @@ * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation + * version 2 as published by the Free Software Foundation * * The author(s) of this software shall not be held liable for damages * of any nature resulting due to the use of this software. This * software is provided AS-IS with no warranties. - * + * * Theoritical note: * * (see Geode(tm) CS5530 manual (rev.4.1) page.56) @@ -21,18 +21,18 @@ * * Suspend Modulation works by asserting and de-asserting the SUSP# pin * to CPU(GX1/GXLV) for configurable durations. When asserting SUSP# - * the CPU enters an idle state. GX1 stops its core clock when SUSP# is + * the CPU enters an idle state. GX1 stops its core clock when SUSP# is * asserted then power consumption is reduced. * - * Suspend Modulation's OFF/ON duration are configurable + * Suspend Modulation's OFF/ON duration are configurable * with 'Suspend Modulation OFF Count Register' * and 'Suspend Modulation ON Count Register'. - * These registers are 8bit counters that represent the number of + * These registers are 8bit counters that represent the number of * 32us intervals which the SUSP# pin is asserted(ON)/de-asserted(OFF) * to the processor. * - * These counters define a ratio which is the effective frequency - * of operation of the system. + * These counters define a ratio which is the effective frequency + * of operation of the system. * * OFF Count * F_eff = Fgx * ---------------------- @@ -40,24 +40,24 @@ * * 0 <= On Count, Off Count <= 255 * - * From these limits, we can get register values + * From these limits, we can get register values * * off_duration + on_duration <= MAX_DURATION * on_duration = off_duration * (stock_freq - freq) / freq * - * off_duration = (freq * DURATION) / stock_freq - * on_duration = DURATION - off_duration + * off_duration = (freq * DURATION) / stock_freq + * on_duration = DURATION - off_duration * * *--------------------------------------------------------------------------- * * ChangeLog: - * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org> - * - fix on/off register mistake - * - fix cpu_khz calc when it stops cpu modulation. + * Dec. 12, 2003 Hiroshi Miura <miura@da-cha.org> + * - fix on/off register mistake + * - fix cpu_khz calc when it stops cpu modulation. * - * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org> - * - rewrite for Cyrix MediaGX Cx5510/5520 and + * Dec. 11, 2002 Hiroshi Miura <miura@da-cha.org> + * - rewrite for Cyrix MediaGX Cx5510/5520 and * NatSemi Geode Cs5530(A). * * Jul. ??, 2002 Zwane Mwaikambo <zwane@commfireservices.com> @@ -74,40 +74,40 @@ ************************************************************************/ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/module.h> #include <linux/init.h> #include <linux/smp.h> #include <linux/cpufreq.h> #include <linux/pci.h> -#include <asm/processor.h> +#include <asm/processor.h> #include <asm/errno.h> /* PCI config registers, all at F0 */ -#define PCI_PMER1 0x80 /* power management enable register 1 */ -#define PCI_PMER2 0x81 /* power management enable register 2 */ -#define PCI_PMER3 0x82 /* power management enable register 3 */ -#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ -#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ -#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ -#define PCI_MODON 0x95 /* suspend modulation ON counter register */ -#define PCI_SUSCFG 0x96 /* suspend configuration register */ +#define PCI_PMER1 0x80 /* power management enable register 1 */ +#define PCI_PMER2 0x81 /* power management enable register 2 */ +#define PCI_PMER3 0x82 /* power management enable register 3 */ +#define PCI_IRQTC 0x8c /* irq speedup timer counter register:typical 2 to 4ms */ +#define PCI_VIDTC 0x8d /* video speedup timer counter register: typical 50 to 100ms */ +#define PCI_MODOFF 0x94 /* suspend modulation OFF counter register, 1 = 32us */ +#define PCI_MODON 0x95 /* suspend modulation ON counter register */ +#define PCI_SUSCFG 0x96 /* suspend configuration register */ /* PMER1 bits */ -#define GPM (1<<0) /* global power management */ -#define GIT (1<<1) /* globally enable PM device idle timers */ -#define GTR (1<<2) /* globally enable IO traps */ -#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ -#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ +#define GPM (1<<0) /* global power management */ +#define GIT (1<<1) /* globally enable PM device idle timers */ +#define GTR (1<<2) /* globally enable IO traps */ +#define IRQ_SPDUP (1<<3) /* disable clock throttle during interrupt handling */ +#define VID_SPDUP (1<<4) /* disable clock throttle during vga video handling */ /* SUSCFG bits */ -#define SUSMOD (1<<0) /* enable/disable suspend modulation */ -/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ -#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ - /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ -#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ -/* the belows support only with cs5530A */ -#define PWRSVE_ISA (1<<3) /* stop ISA clock */ -#define PWRSVE (1<<4) /* active idle */ +#define SUSMOD (1<<0) /* enable/disable suspend modulation */ +/* the belows support only with cs5530 (after rev.1.2)/cs5530A */ +#define SMISPDUP (1<<1) /* select how SMI re-enable suspend modulation: */ + /* IRQTC timer or read SMI speedup disable reg.(F1BAR[08-09h]) */ +#define SUSCFG (1<<2) /* enable powering down a GXLV processor. "Special 3Volt Suspend" mode */ +/* the belows support only with cs5530A */ +#define PWRSVE_ISA (1<<3) /* stop ISA clock */ +#define PWRSVE (1<<4) /* active idle */ struct gxfreq_params { u8 on_duration; @@ -128,7 +128,7 @@ module_param (pci_busclk, int, 0444); /* maximum duration for which the cpu may be suspended * (32us * MAX_DURATION). If no parameter is given, this defaults - * to 255. + * to 255. * Note that this leads to a maximum of 8 ms(!) where the CPU clock * is suspended -- processing power is just 0.39% of what it used to be, * though. 781.25 kHz(!) for a 200 MHz processor -- wow. */ @@ -144,17 +144,17 @@ module_param (max_duration, int, 0444); #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "gx-suspmod", msg) /** - * we can detect a core multipiler from dir0_lsb - * from GX1 datasheet p.56, - * MULT[3:0]: - * 0000 = SYSCLK multiplied by 4 (test only) - * 0001 = SYSCLK multiplied by 10 - * 0010 = SYSCLK multiplied by 4 - * 0011 = SYSCLK multiplied by 6 - * 0100 = SYSCLK multiplied by 9 - * 0101 = SYSCLK multiplied by 5 - * 0110 = SYSCLK multiplied by 7 - * 0111 = SYSCLK multiplied by 8 + * we can detect a core multipiler from dir0_lsb + * from GX1 datasheet p.56, + * MULT[3:0]: + * 0000 = SYSCLK multiplied by 4 (test only) + * 0001 = SYSCLK multiplied by 10 + * 0010 = SYSCLK multiplied by 4 + * 0011 = SYSCLK multiplied by 6 + * 0100 = SYSCLK multiplied by 9 + * 0101 = SYSCLK multiplied by 5 + * 0110 = SYSCLK multiplied by 7 + * 0111 = SYSCLK multiplied by 8 * of 33.3MHz **/ static int gx_freq_mult[16] = { @@ -164,17 +164,17 @@ static int gx_freq_mult[16] = { /**************************************************************** - * Low Level chipset interface * + * Low Level chipset interface * ****************************************************************/ static struct pci_device_id gx_chipset_tbl[] __initdata = { - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, - { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, - { 0, }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5530_LEGACY, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520, PCI_ANY_ID, PCI_ANY_ID }, + { PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510, PCI_ANY_ID, PCI_ANY_ID }, + { 0, }, }; /** - * gx_detect_chipset: + * gx_detect_chipset: * **/ static __init struct pci_dev *gx_detect_chipset(void) @@ -182,17 +182,16 @@ static __init struct pci_dev *gx_detect_chipset(void) struct pci_dev *gx_pci = NULL; /* check if CPU is a MediaGX or a Geode. */ - if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && + if ((current_cpu_data.x86_vendor != X86_VENDOR_NSC) && (current_cpu_data.x86_vendor != X86_VENDOR_CYRIX)) { dprintk("error: no MediaGX/Geode processor found!\n"); - return NULL; + return NULL; } /* detect which companion chip is used */ while ((gx_pci = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, gx_pci)) != NULL) { - if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) { + if ((pci_match_id(gx_chipset_tbl, gx_pci)) != NULL) return gx_pci; - } } dprintk("error: no supported chipset found!\n"); @@ -200,24 +199,24 @@ static __init struct pci_dev *gx_detect_chipset(void) } /** - * gx_get_cpuspeed: + * gx_get_cpuspeed: * * Finds out at which efficient frequency the Cyrix MediaGX/NatSemi Geode CPU runs. */ static unsigned int gx_get_cpuspeed(unsigned int cpu) { - if ((gx_params->pci_suscfg & SUSMOD) == 0) + if ((gx_params->pci_suscfg & SUSMOD) == 0) return stock_freq; - return (stock_freq * gx_params->off_duration) + return (stock_freq * gx_params->off_duration) / (gx_params->on_duration + gx_params->off_duration); } /** * gx_validate_speed: * determine current cpu speed - * -**/ + * + **/ static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off_duration) { @@ -230,7 +229,7 @@ static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off *on_duration=0; for (i=max_duration; i>0; i--) { - tmp_off = ((khz * i) / stock_freq) & 0xff; + tmp_off = ((khz * i) / stock_freq) & 0xff; tmp_on = i - tmp_off; tmp_freq = (stock_freq * tmp_off) / i; /* if this relation is closer to khz, use this. If it's equal, @@ -247,18 +246,17 @@ static unsigned int gx_validate_speed(unsigned int khz, u8 *on_duration, u8 *off /** - * gx_set_cpuspeed: - * set cpu speed in khz. + * gx_set_cpuspeed: + * set cpu speed in khz. **/ static void gx_set_cpuspeed(unsigned int khz) { - u8 suscfg, pmer1; + u8 suscfg, pmer1; unsigned int new_khz; unsigned long flags; struct cpufreq_freqs freqs; - freqs.cpu = 0; freqs.old = gx_get_cpuspeed(0); @@ -303,18 +301,18 @@ static void gx_set_cpuspeed(unsigned int khz) pci_write_config_byte(gx_params->cs55x0, PCI_MODOFF, gx_params->off_duration); pci_write_config_byte(gx_params->cs55x0, PCI_MODON, gx_params->on_duration); - pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); - pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); + pci_write_config_byte(gx_params->cs55x0, PCI_SUSCFG, suscfg); + pci_read_config_byte(gx_params->cs55x0, PCI_SUSCFG, &suscfg); - local_irq_restore(flags); + local_irq_restore(flags); gx_params->pci_suscfg = suscfg; cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); - dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", - gx_params->on_duration * 32, gx_params->off_duration * 32); - dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); + dprintk("suspend modulation w/ duration of ON:%d us, OFF:%d us\n", + gx_params->on_duration * 32, gx_params->off_duration * 32); + dprintk("suspend modulation w/ clock speed: %d kHz.\n", freqs.new); } /**************************************************************** @@ -322,10 +320,10 @@ static void gx_set_cpuspeed(unsigned int khz) ****************************************************************/ /* - * cpufreq_gx_verify: test if frequency range is valid + * cpufreq_gx_verify: test if frequency range is valid * - * This function checks if a given frequency range in kHz is valid - * for the hardware supported by the driver. + * This function checks if a given frequency range in kHz is valid + * for the hardware supported by the driver. */ static int cpufreq_gx_verify(struct cpufreq_policy *policy) @@ -333,8 +331,8 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) unsigned int tmp_freq = 0; u8 tmp1, tmp2; - if (!stock_freq || !policy) - return -EINVAL; + if (!stock_freq || !policy) + return -EINVAL; policy->cpu = 0; cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); @@ -342,14 +340,14 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) /* it needs to be assured that at least one supported frequency is * within policy->min and policy->max. If it is not, policy->max * needs to be increased until one freuqency is supported. - * policy->min may not be decreased, though. This way we guarantee a + * policy->min may not be decreased, though. This way we guarantee a * specific processing capacity. */ tmp_freq = gx_validate_speed(policy->min, &tmp1, &tmp2); - if (tmp_freq < policy->min) + if (tmp_freq < policy->min) tmp_freq += stock_freq / max_duration; policy->min = tmp_freq; - if (policy->min > policy->max) + if (policy->min > policy->max) policy->max = tmp_freq; tmp_freq = gx_validate_speed(policy->max, &tmp1, &tmp2); if (tmp_freq > policy->max) @@ -358,12 +356,12 @@ static int cpufreq_gx_verify(struct cpufreq_policy *policy) if (policy->max < policy->min) policy->max = policy->min; cpufreq_verify_within_limits(policy, (stock_freq / max_duration), stock_freq); - + return 0; } /* - * cpufreq_gx_target: + * cpufreq_gx_target: * */ static int cpufreq_gx_target(struct cpufreq_policy *policy, @@ -373,8 +371,8 @@ static int cpufreq_gx_target(struct cpufreq_policy *policy, u8 tmp1, tmp2; unsigned int tmp_freq; - if (!stock_freq || !policy) - return -EINVAL; + if (!stock_freq || !policy) + return -EINVAL; policy->cpu = 0; @@ -431,7 +429,7 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy) return 0; } -/* +/* * cpufreq_gx_init: * MediaGX/Geode GX initialize cpufreq driver */ @@ -452,7 +450,7 @@ static int __init cpufreq_gx_init(void) u32 class_rev; /* Test if we have the right hardware */ - if ((gx_pci = gx_detect_chipset()) == NULL) + if ((gx_pci = gx_detect_chipset()) == NULL) return -ENODEV; /* check whether module parameters are sane */ @@ -461,10 +459,9 @@ static int __init cpufreq_gx_init(void) dprintk("geode suspend modulation available.\n"); - params = kmalloc(sizeof(struct gxfreq_params), GFP_KERNEL); + params = kzalloc(sizeof(struct gxfreq_params), GFP_KERNEL); if (params == NULL) return -ENOMEM; - memset(params, 0, sizeof(struct gxfreq_params)); params->cs55x0 = gx_pci; gx_params = params; @@ -478,7 +475,7 @@ static int __init cpufreq_gx_init(void) pci_read_config_dword(params->cs55x0, PCI_CLASS_REVISION, &class_rev); params->pci_rev = class_rev && 0xff; - if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { + if ((ret = cpufreq_register_driver(&gx_suspmod_driver))) { kfree(params); return ret; /* register error! */ } diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.h b/arch/i386/kernel/cpu/cpufreq/longhaul.h index 2a495c1..d3a95d77 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.h +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.h @@ -234,7 +234,7 @@ static int __initdata ezrat_eblcr[32] = { /* * VIA C3 Nehemiah */ - + static int __initdata nehemiah_a_clock_ratio[32] = { 100, /* 0000 -> 10.0x */ 160, /* 0001 -> 16.0x */ @@ -446,7 +446,7 @@ static int __initdata nehemiah_c_eblcr[32] = { /* end of table */ }; -/* +/* * Voltage scales. Div/Mod by 1000 to get actual voltage. * Which scale to use depends on the VRM type in use. */ diff --git a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c index cc73a7a..ab6504e 100644 --- a/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/i386/kernel/cpu/cpufreq/p4-clockmod.c @@ -14,7 +14,7 @@ * The author(s) of this software shall not be held liable for damages * of any nature resulting due to the use of this software. This * software is provided AS-IS with no warranties. - * + * * Date Errata Description * 20020525 N44, O17 12.5% or 25% DC causes lockup * @@ -22,7 +22,7 @@ #include <linux/config.h> #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/module.h> #include <linux/init.h> #include <linux/smp.h> #include <linux/cpufreq.h> @@ -30,7 +30,7 @@ #include <linux/cpumask.h> #include <linux/sched.h> /* current / set_cpus_allowed() */ -#include <asm/processor.h> +#include <asm/processor.h> #include <asm/msr.h> #include <asm/timex.h> @@ -79,7 +79,7 @@ static int cpufreq_p4_setdc(unsigned int cpu, unsigned int newstate) } else { dprintk("CPU#%d setting duty cycle to %d%%\n", cpu, ((125 * newstate) / 10)); - /* bits 63 - 5 : reserved + /* bits 63 - 5 : reserved * bit 4 : enable/disable * bits 3-1 : duty cycle * bit 0 : reserved @@ -132,7 +132,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, } /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software - * Developer's Manual, Volume 3 + * Developer's Manual, Volume 3 */ cpus_allowed = current->cpus_allowed; @@ -206,7 +206,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_P4D); } - + static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) { @@ -234,7 +234,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) dprintk("has errata -- disabling frequencies lower than 2ghz\n"); break; } - + /* get max frequency */ stock_freq = cpufreq_p4_get_frequency(c); if (!stock_freq) @@ -244,13 +244,13 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) for (i=1; (p4clockmod_table[i].frequency != CPUFREQ_TABLE_END); i++) { if ((i<2) && (has_N44_O17_errata[policy->cpu])) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; - else if (has_N60_errata[policy->cpu] && p4clockmod_table[i].frequency < 2000000) + else if (has_N60_errata[policy->cpu] && ((stock_freq * i)/8) < 2000000) p4clockmod_table[i].frequency = CPUFREQ_ENTRY_INVALID; else p4clockmod_table[i].frequency = (stock_freq * i)/8; } cpufreq_frequency_table_get_attr(p4clockmod_table, policy->cpu); - + /* cpuinfo and default policy values */ policy->governor = CPUFREQ_DEFAULT_GOVERNOR; policy->cpuinfo.transition_latency = 1000000; /* assumed */ @@ -262,7 +262,7 @@ static int cpufreq_p4_cpu_init(struct cpufreq_policy *policy) static int cpufreq_p4_cpu_exit(struct cpufreq_policy *policy) { - cpufreq_frequency_table_put_attr(policy->cpu); + cpufreq_frequency_table_put_attr(policy->cpu); return 0; } @@ -298,7 +298,7 @@ static struct freq_attr* p4clockmod_attr[] = { }; static struct cpufreq_driver p4clockmod_driver = { - .verify = cpufreq_p4_verify, + .verify = cpufreq_p4_verify, .target = cpufreq_p4_target, .init = cpufreq_p4_cpu_init, .exit = cpufreq_p4_cpu_exit, @@ -310,12 +310,12 @@ static struct cpufreq_driver p4clockmod_driver = { static int __init cpufreq_p4_init(void) -{ +{ struct cpuinfo_x86 *c = cpu_data; int ret; /* - * THERM_CONTROL is architectural for IA32 now, so + * THERM_CONTROL is architectural for IA32 now, so * we can rely on the capability checks */ if (c->x86_vendor != X86_VENDOR_INTEL) diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c index 222f8cf..f895240 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k6.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k6.c @@ -8,7 +8,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/module.h> #include <linux/init.h> #include <linux/cpufreq.h> #include <linux/ioport.h> @@ -50,7 +50,7 @@ static int powernow_k6_get_cpu_multiplier(void) { u64 invalue = 0; u32 msrval; - + msrval = POWERNOW_IOPORT + 0x1; wrmsr(MSR_K6_EPMR, msrval, 0); /* enable the PowerNow port */ invalue=inl(POWERNOW_IOPORT + 0x8); @@ -81,7 +81,7 @@ static void powernow_k6_set_state (unsigned int best_i) freqs.old = busfreq * powernow_k6_get_cpu_multiplier(); freqs.new = busfreq * clock_ratio[best_i].index; freqs.cpu = 0; /* powernow-k6.c is UP only driver */ - + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); /* we now need to transform best_i to the BVC format, see AMD#23446 */ @@ -152,7 +152,7 @@ static int powernow_k6_cpu_init(struct cpufreq_policy *policy) busfreq = cpu_khz / max_multiplier; /* table init */ - for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { + for (i=0; (clock_ratio[i].frequency != CPUFREQ_TABLE_END); i++) { if (clock_ratio[i].index > max_multiplier) clock_ratio[i].frequency = CPUFREQ_ENTRY_INVALID; else @@ -182,7 +182,7 @@ static int powernow_k6_cpu_exit(struct cpufreq_policy *policy) powernow_k6_set_state(i); } cpufreq_frequency_table_put_attr(policy->cpu); - return 0; + return 0; } static unsigned int powernow_k6_get(unsigned int cpu) @@ -196,8 +196,8 @@ static struct freq_attr* powernow_k6_attr[] = { }; static struct cpufreq_driver powernow_k6_driver = { - .verify = powernow_k6_verify, - .target = powernow_k6_target, + .verify = powernow_k6_verify, + .target = powernow_k6_target, .init = powernow_k6_cpu_init, .exit = powernow_k6_cpu_exit, .get = powernow_k6_get, @@ -215,7 +215,7 @@ static struct cpufreq_driver powernow_k6_driver = { * on success. */ static int __init powernow_k6_init(void) -{ +{ struct cpuinfo_x86 *c = cpu_data; if ((c->x86_vendor != X86_VENDOR_AMD) || (c->x86 != 5) || diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c index edcd626..2bf4237 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k7.c @@ -199,8 +199,8 @@ static int get_ranges (unsigned char *pst) powernow_table[j].index |= (vid << 8); /* upper 8 bits */ dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed/1000, vid, + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed/1000, vid, mobile_vid_table[vid]/1000, mobile_vid_table[vid]%1000); } @@ -368,8 +368,8 @@ static int powernow_acpi_init(void) } dprintk (" FID: 0x%x (%d.%dx [%dMHz]) " - "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, - fid_codes[fid] % 10, speed/1000, vid, + "VID: 0x%x (%d.%03dV)\n", fid, fid_codes[fid] / 10, + fid_codes[fid] % 10, speed/1000, vid, mobile_vid_table[vid]/1000, mobile_vid_table[vid]%1000); @@ -460,7 +460,7 @@ static int powernow_decode_bios (int maxfid, int startvid) (maxfid==pst->maxfid) && (startvid==pst->startvid)) { dprintk ("PST:%d (@%p)\n", i, pst); - dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", + dprintk (" cpuid: 0x%x fsb: %d maxFID: 0x%x startvid: 0x%x\n", pst->cpuid, pst->fsbspeed, pst->maxfid, pst->startvid); ret = get_ranges ((char *) pst + sizeof (struct pst_s)); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e11a092..712a26b 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -40,21 +40,22 @@ #ifdef CONFIG_X86_POWERNOW_K8_ACPI #include <linux/acpi.h> +#include <linux/mutex.h> #include <acpi/processor.h> #endif #define PFX "powernow-k8: " #define BFX PFX "BIOS error: " -#define VERSION "version 1.60.0" +#define VERSION "version 1.60.1" #include "powernow-k8.h" /* serialize freq changes */ -static DECLARE_MUTEX(fidvid_sem); +static DEFINE_MUTEX(fidvid_mutex); static struct powernow_k8_data *powernow_data[NR_CPUS]; #ifndef CONFIG_SMP -static cpumask_t cpu_core_map[1]; +static cpumask_t cpu_core_map[1] = { CPU_MASK_ALL }; #endif /* Return a frequency in MHz, given an input fid */ @@ -83,11 +84,10 @@ static u32 find_millivolts_from_vid(struct powernow_k8_data *data, u32 vid) */ static u32 convert_fid_to_vco_fid(u32 fid) { - if (fid < HI_FID_TABLE_BOTTOM) { + if (fid < HI_FID_TABLE_BOTTOM) return 8 + (2 * fid); - } else { + else return fid; - } } /* @@ -177,7 +177,7 @@ static int write_new_fid(struct powernow_k8_data *data, u32 fid) if (i++ > 100) { printk(KERN_ERR PFX "internal error - pending bit very stuck - no further pstate changes possible\n"); return 1; - } + } } while (query_current_values_with_pending_wait(data)); count_off_irt(data); @@ -474,8 +474,10 @@ static int check_supported_cpu(unsigned int cpu) goto out; eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + if ((eax & CPUID_XFAM) != CPUID_XFAM_K8) + goto out; + if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || - ((eax & CPUID_XFAM) != CPUID_XFAM_K8) || ((eax & CPUID_XMOD) > CPUID_XMOD_REV_G)) { printk(KERN_INFO PFX "Processor cpuid %x not supported\n", eax); goto out; @@ -780,9 +782,7 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) /* verify only 1 entry from the lo frequency table */ if (fid < HI_FID_TABLE_BOTTOM) { if (cntlofreq) { - /* if both entries are the same, ignore this - * one... - */ + /* if both entries are the same, ignore this one ... */ if ((powernow_table[i].frequency != powernow_table[cntlofreq].frequency) || (powernow_table[i].index != powernow_table[cntlofreq].index)) { printk(KERN_ERR PFX "Too many lo freq table entries\n"); @@ -854,7 +854,7 @@ static int transition_frequency(struct powernow_k8_data *data, unsigned int inde dprintk("cpu %d transition to index %u\n", smp_processor_id(), index); /* fid are the lower 8 bits of the index we stored into - * the cpufreq frequency table in find_psb_table, vid are + * the cpufreq frequency table in find_psb_table, vid are * the upper 8 bits. */ @@ -909,7 +909,6 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi u32 checkvid = data->currvid; unsigned int newstate; int ret = -EIO; - int i; /* only run on specific CPU from here on */ oldmask = current->cpus_allowed; @@ -945,23 +944,17 @@ static int powernowk8_target(struct cpufreq_policy *pol, unsigned targfreq, unsi if (cpufreq_frequency_table_target(pol, data->powernow_table, targfreq, relation, &newstate)) goto err_out; - down(&fidvid_sem); + mutex_lock(&fidvid_mutex); powernow_k8_acpi_pst_values(data, newstate); if (transition_frequency(data, newstate)) { printk(KERN_ERR PFX "transition frequency failed\n"); ret = 1; - up(&fidvid_sem); + mutex_unlock(&fidvid_mutex); goto err_out; } - - /* Update all the fid/vids of our siblings */ - for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { - powernow_data[i]->currvid = data->currvid; - powernow_data[i]->currfid = data->currfid; - } - up(&fidvid_sem); + mutex_unlock(&fidvid_mutex); pol->cur = find_khz_freq_from_fid(data->currfid); ret = 0; @@ -1048,7 +1041,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) pol->governor = CPUFREQ_DEFAULT_GOVERNOR; pol->cpus = cpu_core_map[pol->cpu]; - /* Take a crude guess here. + /* Take a crude guess here. * That guess was in microseconds, so multiply with 1000 */ pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US) + (3 * (1 << data->irt) * 10)) * 1000; @@ -1070,9 +1063,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) printk("cpu_init done, current fid 0x%x, vid 0x%x\n", data->currfid, data->currvid); - for_each_cpu_mask(i, cpu_core_map[pol->cpu]) { + for_each_cpu_mask(i, cpu_core_map[pol->cpu]) powernow_data[i] = data; - } return 0; @@ -1103,10 +1095,15 @@ static int __devexit powernowk8_cpu_exit (struct cpufreq_policy *pol) static unsigned int powernowk8_get (unsigned int cpu) { - struct powernow_k8_data *data = powernow_data[cpu]; + struct powernow_k8_data *data; cpumask_t oldmask = current->cpus_allowed; unsigned int khz = 0; + data = powernow_data[first_cpu(cpu_core_map[cpu])]; + + if (!data) + return -EINVAL; + set_cpus_allowed(current, cpumask_of_cpu(cpu)); if (smp_processor_id() != cpu) { printk(KERN_ERR PFX "limiting to CPU %d failed in powernowk8_get\n", cpu); @@ -1145,16 +1142,14 @@ static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - for (i=0; i<NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (check_supported_cpu(i)) supported_cpus++; } if (supported_cpus == num_online_cpus()) { - printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron processors (" VERSION ")\n", - supported_cpus); + printk(KERN_INFO PFX "Found %d AMD Athlon 64 / Opteron " + "processors (" VERSION ")\n", supported_cpus); return cpufreq_register_driver(&cpufreq_amd64_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h index d0de37d..79a7c5c 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.h @@ -63,7 +63,7 @@ struct powernow_k8_data { #define MSR_C_LO_VID_SHIFT 8 /* Field definitions within the FID VID High Control MSR : */ -#define MSR_C_HI_STP_GNT_TO 0x000fffff +#define MSR_C_HI_STP_GNT_TO 0x000fffff /* Field definitions within the FID VID Low Status MSR : */ #define MSR_S_LO_CHANGE_PENDING 0x80000000 /* cleared when completed */ @@ -123,7 +123,7 @@ struct powernow_k8_data { * Most values of interest are enocoded in a single field of the _PSS * entries: the "control" value. */ - + #define IRT_SHIFT 30 #define RVO_SHIFT 28 #define EXT_TYPE_SHIFT 27 @@ -182,10 +182,6 @@ static int core_frequency_transition(struct powernow_k8_data *data, u32 reqfid); static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index); -#ifndef for_each_cpu_mask -#define for_each_cpu_mask(i,mask) for (i=0;i<1;i++) -#endif - #ifdef CONFIG_SMP static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) { diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c index c173c0f..b0ff907 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-centrino.c @@ -479,15 +479,13 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) unsigned l, h; int ret; int i; - struct cpuinfo_x86 *c = &cpu_data[policy->cpu]; /* Only Intel makes Enhanced Speedstep-capable CPUs */ if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) return -ENODEV; - if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { + if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) centrino_driver.flags |= CPUFREQ_CONST_LOOPS; - } if (centrino_cpu_init_acpi(policy)) { if (policy->cpu != 0) diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c index 7c47005..4f46cac 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.c @@ -9,7 +9,7 @@ */ #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/module.h> #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/cpufreq.h> @@ -36,8 +36,8 @@ static unsigned int pentium3_get_frequency (unsigned int processor) /* See table 14 of p3_ds.pdf and table 22 of 29834003.pdf */ struct { unsigned int ratio; /* Frequency Multiplier (x10) */ - u8 bitmap; /* power on configuration bits - [27, 25:22] (in MSR 0x2a) */ + u8 bitmap; /* power on configuration bits + [27, 25:22] (in MSR 0x2a) */ } msr_decode_mult [] = { { 30, 0x01 }, { 35, 0x05 }, @@ -58,9 +58,9 @@ static unsigned int pentium3_get_frequency (unsigned int processor) /* PIII(-M) FSB settings: see table b1-b of 24547206.pdf */ struct { - unsigned int value; /* Front Side Bus speed in MHz */ - u8 bitmap; /* power on configuration bits [18: 19] - (in MSR 0x2a) */ + unsigned int value; /* Front Side Bus speed in MHz */ + u8 bitmap; /* power on configuration bits [18: 19] + (in MSR 0x2a) */ } msr_decode_fsb [] = { { 66, 0x0 }, { 100, 0x2 }, @@ -68,8 +68,8 @@ static unsigned int pentium3_get_frequency (unsigned int processor) { 0, 0xff} }; - u32 msr_lo, msr_tmp; - int i = 0, j = 0; + u32 msr_lo, msr_tmp; + int i = 0, j = 0; /* read MSR 0x2a - we only need the low 32 bits */ rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); @@ -106,7 +106,7 @@ static unsigned int pentium3_get_frequency (unsigned int processor) static unsigned int pentiumM_get_frequency(void) { - u32 msr_lo, msr_tmp; + u32 msr_lo, msr_tmp; rdmsr(MSR_IA32_EBL_CR_POWERON, msr_lo, msr_tmp); dprintk("PM - MSR_IA32_EBL_CR_POWERON: 0x%x 0x%x\n", msr_lo, msr_tmp); @@ -134,7 +134,7 @@ static unsigned int pentium4_get_frequency(void) dprintk("P4 - MSR_EBC_FREQUENCY_ID: 0x%x 0x%x\n", msr_lo, msr_hi); - /* decode the FSB: see IA-32 Intel (C) Architecture Software + /* decode the FSB: see IA-32 Intel (C) Architecture Software * Developer's Manual, Volume 3: System Prgramming Guide, * revision #12 in Table B-1: MSRs in the Pentium 4 and * Intel Xeon Processors, on page B-4 and B-5. @@ -170,7 +170,7 @@ static unsigned int pentium4_get_frequency(void) return (fsb * mult); } - + unsigned int speedstep_get_processor_frequency(unsigned int processor) { switch (processor) { @@ -198,11 +198,11 @@ EXPORT_SYMBOL_GPL(speedstep_get_processor_frequency); unsigned int speedstep_detect_processor (void) { struct cpuinfo_x86 *c = cpu_data; - u32 ebx, msr_lo, msr_hi; + u32 ebx, msr_lo, msr_hi; dprintk("x86: %x, model: %x\n", c->x86, c->x86_model); - if ((c->x86_vendor != X86_VENDOR_INTEL) || + if ((c->x86_vendor != X86_VENDOR_INTEL) || ((c->x86 != 6) && (c->x86 != 0xF))) return 0; @@ -218,15 +218,15 @@ unsigned int speedstep_detect_processor (void) dprintk("ebx value is %x, x86_mask is %x\n", ebx, c->x86_mask); switch (c->x86_mask) { - case 4: + case 4: /* - * B-stepping [M-P4-M] + * B-stepping [M-P4-M] * sample has ebx = 0x0f, production has 0x0e. */ if ((ebx == 0x0e) || (ebx == 0x0f)) return SPEEDSTEP_PROCESSOR_P4M; break; - case 7: + case 7: /* * C-stepping [M-P4-M] * needs to have ebx=0x0e, else it's a celeron: @@ -253,7 +253,7 @@ unsigned int speedstep_detect_processor (void) * also, M-P4M HTs have ebx=0x8, too * For now, they are distinguished by the model_id string */ - if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) + if ((ebx == 0x0e) || (strstr(c->x86_model_id,"Mobile Intel(R) Pentium(R) 4") != NULL)) return SPEEDSTEP_PROCESSOR_P4M; break; default: @@ -264,8 +264,7 @@ unsigned int speedstep_detect_processor (void) switch (c->x86_model) { case 0x0B: /* Intel PIII [Tualatin] */ - /* cpuid_ebx(1) is 0x04 for desktop PIII, - 0x06 for mobile PIII-M */ + /* cpuid_ebx(1) is 0x04 for desktop PIII, 0x06 for mobile PIII-M */ ebx = cpuid_ebx(0x00000001); dprintk("ebx is %x\n", ebx); @@ -275,9 +274,8 @@ unsigned int speedstep_detect_processor (void) return 0; /* So far all PIII-M processors support SpeedStep. See - * Intel's 24540640.pdf of June 2003 + * Intel's 24540640.pdf of June 2003 */ - return SPEEDSTEP_PROCESSOR_PIII_T; case 0x08: /* Intel PIII [Coppermine] */ @@ -399,7 +397,7 @@ unsigned int speedstep_get_freqs(unsigned int processor, } } - out: +out: local_irq_restore(flags); return (ret); } diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h index 6a727fd..b735429 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-lib.h @@ -14,7 +14,7 @@ #define SPEEDSTEP_PROCESSOR_PIII_C_EARLY 0x00000001 /* Coppermine core */ #define SPEEDSTEP_PROCESSOR_PIII_C 0x00000002 /* Coppermine core */ -#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ +#define SPEEDSTEP_PROCESSOR_PIII_T 0x00000003 /* Tualatin core */ #define SPEEDSTEP_PROCESSOR_P4M 0x00000004 /* P4-M */ /* the following processors are not speedstep-capable and are not auto-detected @@ -25,8 +25,8 @@ /* speedstep states -- only two of them */ -#define SPEEDSTEP_HIGH 0x00000000 -#define SPEEDSTEP_LOW 0x00000001 +#define SPEEDSTEP_HIGH 0x00000000 +#define SPEEDSTEP_LOW 0x00000001 /* detect a speedstep-capable processor */ @@ -36,13 +36,13 @@ extern unsigned int speedstep_detect_processor (void); extern unsigned int speedstep_get_processor_frequency(unsigned int processor); -/* detect the low and high speeds of the processor. The callback - * set_state"'s first argument is either SPEEDSTEP_HIGH or - * SPEEDSTEP_LOW; the second argument is zero so that no +/* detect the low and high speeds of the processor. The callback + * set_state"'s first argument is either SPEEDSTEP_HIGH or + * SPEEDSTEP_LOW; the second argument is zero so that no * cpufreq_notify_transition calls are initiated. */ extern unsigned int speedstep_get_freqs(unsigned int processor, - unsigned int *low_speed, - unsigned int *high_speed, - unsigned int *transition_latency, - void (*set_state) (unsigned int state)); + unsigned int *low_speed, + unsigned int *high_speed, + unsigned int *transition_latency, + void (*set_state) (unsigned int state)); diff --git a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c index 28cc5d5..c28333d 100644 --- a/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c +++ b/arch/i386/kernel/cpu/cpufreq/speedstep-smi.c @@ -13,8 +13,8 @@ *********************************************************************/ #include <linux/kernel.h> -#include <linux/module.h> -#include <linux/moduleparam.h> +#include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/init.h> #include <linux/cpufreq.h> #include <linux/pci.h> @@ -28,21 +28,21 @@ * * These parameters are got from IST-SMI BIOS call. * If user gives it, these are used. - * + * */ -static int smi_port = 0; -static int smi_cmd = 0; -static unsigned int smi_sig = 0; +static int smi_port = 0; +static int smi_cmd = 0; +static unsigned int smi_sig = 0; /* info about the processor */ -static unsigned int speedstep_processor = 0; +static unsigned int speedstep_processor = 0; -/* - * There are only two frequency states for each processor. Values +/* + * There are only two frequency states for each processor. Values * are in kHz for the time being. */ static struct cpufreq_frequency_table speedstep_freqs[] = { - {SPEEDSTEP_HIGH, 0}, + {SPEEDSTEP_HIGH, 0}, {SPEEDSTEP_LOW, 0}, {0, CPUFREQ_TABLE_END}, }; @@ -75,7 +75,9 @@ static int speedstep_smi_ownership (void) __asm__ __volatile__( "out %%al, (%%dx)\n" : "=D" (result) - : "a" (command), "b" (function), "c" (0), "d" (smi_port), "D" (0), "S" (magic) + : "a" (command), "b" (function), "c" (0), "d" (smi_port), + "D" (0), "S" (magic) + : "memory" ); dprintk("result is %x\n", result); @@ -123,7 +125,7 @@ static int speedstep_smi_get_freqs (unsigned int *low, unsigned int *high) *low = low_mhz * 1000; return result; -} +} /** * speedstep_get_state - set the SpeedStep state @@ -204,7 +206,7 @@ static void speedstep_set_state (unsigned int state) * speedstep_target - set a new CPUFreq policy * @policy: new policy * @target_freq: new freq - * @relation: + * @relation: * * Sets a new CPUFreq policy/freq. */ @@ -283,7 +285,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) state = speedstep_get_state(); speed = speedstep_freqs[state].frequency; - dprintk("currently at %s speed setting - %i MHz\n", + dprintk("currently at %s speed setting - %i MHz\n", (speed == speedstep_freqs[SPEEDSTEP_LOW].frequency) ? "low" : "high", (speed / 1000)); @@ -296,7 +298,7 @@ static int speedstep_cpu_init(struct cpufreq_policy *policy) if (result) return (result); - cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); + cpufreq_frequency_table_get_attr(speedstep_freqs, policy->cpu); return 0; } @@ -332,8 +334,8 @@ static struct freq_attr* speedstep_attr[] = { static struct cpufreq_driver speedstep_driver = { .name = "speedstep-smi", - .verify = speedstep_verify, - .target = speedstep_target, + .verify = speedstep_verify, + .target = speedstep_target, .init = speedstep_cpu_init, .exit = speedstep_cpu_exit, .get = speedstep_get, @@ -370,13 +372,12 @@ static int __init speedstep_init(void) return -ENODEV; } - dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", + dprintk("signature:0x%.8lx, command:0x%.8lx, event:0x%.8lx, perf_level:0x%.8lx.\n", ist_info.signature, ist_info.command, ist_info.event, ist_info.perf_level); - - /* Error if no IST-SMI BIOS or no PARM + /* Error if no IST-SMI BIOS or no PARM sig= 'ISGE' aka 'Intel Speedstep Gate E' */ - if ((ist_info.signature != 0x47534943) && ( + if ((ist_info.signature != 0x47534943) && ( (smi_port == 0) || (smi_cmd == 0))) return -ENODEV; @@ -386,17 +387,15 @@ static int __init speedstep_init(void) smi_sig = ist_info.signature; /* setup smi_port from MODLULE_PARM or BIOS */ - if ((smi_port > 0xff) || (smi_port < 0)) { + if ((smi_port > 0xff) || (smi_port < 0)) return -EINVAL; - } else if (smi_port == 0) { + else if (smi_port == 0) smi_port = ist_info.command & 0xff; - } - if ((smi_cmd > 0xff) || (smi_cmd < 0)) { + if ((smi_cmd > 0xff) || (smi_cmd < 0)) return -EINVAL; - } else if (smi_cmd == 0) { + else if (smi_cmd == 0) smi_cmd = (ist_info.command >> 16) & 0xff; - } return cpufreq_register_driver(&speedstep_driver); } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 8c01201..5386b29 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -29,7 +29,7 @@ extern int trap_init_f00f_bug(void); struct movsl_mask movsl_mask __read_mostly; #endif -void __devinit early_intel_workaround(struct cpuinfo_x86 *c) +void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -44,7 +44,7 @@ void __devinit early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __devinit ppro_with_ram_bug(void) +int __cpuinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -62,7 +62,7 @@ int __devinit ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -81,7 +81,7 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -96,7 +96,7 @@ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __devinit init_intel(struct cpuinfo_x86 *c) +static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -205,7 +205,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __devinitdata = { +static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ffe58ce..9df87b0 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -173,8 +173,12 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; +#ifdef CONFIG_SMP + unsigned int cpu = (c == &boot_cpu_data) ? 0 : (c - cpu_data); +#endif - if (c->cpuid_level > 4) { + if (c->cpuid_level > 3) { static int is_initialized; if (is_initialized == 0) { @@ -205,9 +209,15 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) break; case 2: new_l2 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->apicid >> index_msb; break; case 3: new_l3 = this_leaf.size/1024; + num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->apicid >> index_msb; break; default: break; @@ -215,11 +225,19 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } - if (c->cpuid_level > 1) { + /* + * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for + * trace cache + */ + if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) { /* supports eax=2 call */ int i, j, n; int regs[4]; unsigned char *dp = (unsigned char *)regs; + int only_trace = 0; + + if (num_cache_leaves != 0 && c->x86 == 15) + only_trace = 1; /* Number of times to iterate */ n = cpuid_eax(2) & 0xFF; @@ -241,6 +259,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) while (cache_table[k].descriptor != 0) { if (cache_table[k].descriptor == des) { + if (only_trace && cache_table[k].cache_type != LVL_TRACE) + break; switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; @@ -266,34 +286,45 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) } } } + } - if (new_l1d) - l1d = new_l1d; + if (new_l1d) + l1d = new_l1d; - if (new_l1i) - l1i = new_l1i; + if (new_l1i) + l1i = new_l1i; - if (new_l2) - l2 = new_l2; + if (new_l2) { + l2 = new_l2; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l2_id; +#endif + } - if (new_l3) - l3 = new_l3; + if (new_l3) { + l3 = new_l3; +#ifdef CONFIG_SMP + cpu_llc_id[cpu] = l3_id; +#endif + } - if ( trace ) - printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); - else if ( l1i ) - printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - if ( l1d ) - printk(", L1 D cache: %dK\n", l1d); - else - printk("\n"); - if ( l2 ) - printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); - if ( l3 ) - printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + if (trace) + printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); + else if ( l1i ) + printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - } + if (l1d) + printk(", L1 D cache: %dK\n", l1d); + else + printk("\n"); + + if (l2) + printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); + + if (l3) + printk(KERN_INFO "CPU: L3 cache: %dK\n", l3); + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); return l2; } @@ -330,7 +361,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) } } } -static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf, *sibling_leaf; int sibling; diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 6170af3..afa0888 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c @@ -64,13 +64,13 @@ void mcheck_init(struct cpuinfo_x86 *c) static int __init mcheck_disable(char *str) { mce_disabled = 1; - return 0; + return 1; } static int __init mcheck_enable(char *str) { mce_disabled = -1; - return 0; + return 1; } __setup("nomce", mcheck_disable); diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 3b4618b..fff90bd 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c @@ -36,6 +36,7 @@ #include <linux/pci.h> #include <linux/smp.h> #include <linux/cpu.h> +#include <linux/mutex.h> #include <asm/mtrr.h> @@ -47,7 +48,7 @@ u32 num_var_ranges = 0; unsigned int *usage_table; -static DECLARE_MUTEX(mtrr_sem); +static DEFINE_MUTEX(mtrr_mutex); u32 size_or_mask, size_and_mask; @@ -333,7 +334,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); /* Search for existing MTRR */ - down(&mtrr_sem); + mutex_lock(&mtrr_mutex); for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (base >= lbase + lsize) @@ -371,7 +372,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, printk(KERN_INFO "mtrr: no more MTRRs available\n"); error = i; out: - up(&mtrr_sem); + mutex_unlock(&mtrr_mutex); unlock_cpu_hotplug(); return error; } @@ -464,7 +465,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) max = num_var_ranges; /* No CPU hotplug when we change MTRR entries */ lock_cpu_hotplug(); - down(&mtrr_sem); + mutex_lock(&mtrr_mutex); if (reg < 0) { /* Search for existing MTRR */ for (i = 0; i < max; ++i) { @@ -503,7 +504,7 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) set_mtrr(reg, 0, 0, 0); error = reg; out: - up(&mtrr_sem); + mutex_unlock(&mtrr_mutex); unlock_cpu_hotplug(); return error; } @@ -685,7 +686,7 @@ void mtrr_ap_init(void) if (!mtrr_if || !use_intel()) return; /* - * Ideally we should hold mtrr_sem here to avoid mtrr entries changed, + * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, * but this routine will be called in cpu boot time, holding the lock * breaks it. This routine is called in two cases: 1.very earily time * of software resume, when there absolutely isn't mtrr entry changes; diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 89a85af..f94cdb7 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -40,12 +40,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* Intel-defined (#2) */ - "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", + "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index d49dbe8..e3c5fca0 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -105,7 +105,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) return 1; local_irq_disable(); - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff --git a/arch/i386/kernel/dmi_scan.c b/arch/i386/kernel/dmi_scan.c index 6a93d75..5efceeb 100644 --- a/arch/i386/kernel/dmi_scan.c +++ b/arch/i386/kernel/dmi_scan.c @@ -3,8 +3,10 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/dmi.h> +#include <linux/efi.h> #include <linux/bootmem.h> #include <linux/slab.h> +#include <asm/dmi.h> static char * __init dmi_string(struct dmi_header *dm, u8 s) { @@ -106,7 +108,7 @@ static void __init dmi_save_devices(struct dmi_header *dm) struct dmi_device *dev; for (i = 0; i < count; i++) { - char *d = ((char *) dm) + (i * 2); + char *d = (char *)(dm + 1) + (i * 2); /* Skip disabled device */ if ((*d & 0x80) == 0) @@ -184,47 +186,72 @@ static void __init dmi_decode(struct dmi_header *dm) } } -void __init dmi_scan_machine(void) +static int __init dmi_present(char __iomem *p) { u8 buf[15]; - char __iomem *p, *q; + memcpy_fromio(buf, p, 15); + if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { + u16 num = (buf[13] << 8) | buf[12]; + u16 len = (buf[7] << 8) | buf[6]; + u32 base = (buf[11] << 24) | (buf[10] << 16) | + (buf[9] << 8) | buf[8]; - /* - * no iounmap() for that ioremap(); it would be a no-op, but it's - * so early in setup that sucker gets confused into doing what - * it shouldn't if we actually call it. - */ - p = ioremap(0xF0000, 0x10000); - if (p == NULL) - goto out; - - for (q = p; q < p + 0x10000; q += 16) { - memcpy_fromio(buf, q, 15); - if ((memcmp(buf, "_DMI_", 5) == 0) && dmi_checksum(buf)) { - u16 num = (buf[13] << 8) | buf[12]; - u16 len = (buf[7] << 8) | buf[6]; - u32 base = (buf[11] << 24) | (buf[10] << 16) | - (buf[9] << 8) | buf[8]; - - /* - * DMI version 0.0 means that the real version is taken from - * the SMBIOS version, which we don't know at this point. - */ - if (buf[14] != 0) - printk(KERN_INFO "DMI %d.%d present.\n", - buf[14] >> 4, buf[14] & 0xF); - else - printk(KERN_INFO "DMI present.\n"); + /* + * DMI version 0.0 means that the real version is taken from + * the SMBIOS version, which we don't know at this point. + */ + if (buf[14] != 0) + printk(KERN_INFO "DMI %d.%d present.\n", + buf[14] >> 4, buf[14] & 0xF); + else + printk(KERN_INFO "DMI present.\n"); + if (dmi_table(base,len, num, dmi_decode) == 0) + return 0; + } + return 1; +} + +void __init dmi_scan_machine(void) +{ + char __iomem *p, *q; + int rc; + + if (efi_enabled) { + if (efi.smbios == EFI_INVALID_TABLE_ADDR) + goto out; + + /* This is called as a core_initcall() because it isn't + * needed during early boot. This also means we can + * iounmap the space when we're done with it. + */ + p = dmi_ioremap(efi.smbios, 32); + if (p == NULL) + goto out; + + rc = dmi_present(p + 0x10); /* offset of _DMI_ string */ + dmi_iounmap(p, 32); + if (!rc) + return; + } + else { + /* + * no iounmap() for that ioremap(); it would be a no-op, but + * it's so early in setup that sucker gets confused into doing + * what it shouldn't if we actually call it. + */ + p = dmi_ioremap(0xF0000, 0x10000); + if (p == NULL) + goto out; - if (dmi_table(base,len, num, dmi_decode) == 0) + for (q = p; q < p + 0x10000; q += 16) { + rc = dmi_present(q); + if (!rc) return; } } - -out: printk(KERN_INFO "DMI not present or invalid.\n"); + out: printk(KERN_INFO "DMI not present or invalid.\n"); } - /** * dmi_check_system - check system DMI data * @list: array of dmi_system_id structures to match against @@ -299,3 +326,33 @@ struct dmi_device * dmi_find_device(int type, const char *name, return NULL; } EXPORT_SYMBOL(dmi_find_device); + +/** + * dmi_get_year - Return year of a DMI date + * @field: data index (like dmi_get_system_info) + * + * Returns -1 when the field doesn't exist. 0 when it is broken. + */ +int dmi_get_year(int field) +{ + int year; + char *s = dmi_get_system_info(field); + + if (!s) + return -1; + if (*s == '\0') + return 0; + s = strrchr(s, '/'); + if (!s) + return 0; + + s += 1; + year = simple_strtoul(s, NULL, 0); + if (year && year < 100) { /* 2-digit year */ + year += 1900; + if (year < 1996) /* no dates < spec 1.0 */ + year += 100; + } + + return year; +} diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index c9cad7b..9202b67 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void) unsigned long cr4; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - cpu_gdt_descr->address = __va(cpu_gdt_descr->address); + cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); load_gdt(cpu_gdt_descr); cr4 = read_cr4(); @@ -361,7 +361,7 @@ void __init efi_init(void) */ c16 = (efi_char16_t *) boot_ioremap(efi.systab->fw_vendor, 2); if (c16) { - for (i = 0; i < sizeof(vendor) && *c16; ++i) + for (i = 0; i < (sizeof(vendor) - 1) && *c16; ++i) vendor[i] = *c16++; vendor[i] = '\0'; } else @@ -381,29 +381,38 @@ void __init efi_init(void) if (config_tables == NULL) printk(KERN_ERR PFX "Could not map EFI Configuration Table!\n"); + efi.mps = EFI_INVALID_TABLE_ADDR; + efi.acpi = EFI_INVALID_TABLE_ADDR; + efi.acpi20 = EFI_INVALID_TABLE_ADDR; + efi.smbios = EFI_INVALID_TABLE_ADDR; + efi.sal_systab = EFI_INVALID_TABLE_ADDR; + efi.boot_info = EFI_INVALID_TABLE_ADDR; + efi.hcdp = EFI_INVALID_TABLE_ADDR; + efi.uga = EFI_INVALID_TABLE_ADDR; + for (i = 0; i < num_config_tables; i++) { if (efi_guidcmp(config_tables[i].guid, MPS_TABLE_GUID) == 0) { - efi.mps = (void *)config_tables[i].table; + efi.mps = config_tables[i].table; printk(KERN_INFO " MPS=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_20_TABLE_GUID) == 0) { - efi.acpi20 = __va(config_tables[i].table); + efi.acpi20 = config_tables[i].table; printk(KERN_INFO " ACPI 2.0=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, ACPI_TABLE_GUID) == 0) { - efi.acpi = __va(config_tables[i].table); + efi.acpi = config_tables[i].table; printk(KERN_INFO " ACPI=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, SMBIOS_TABLE_GUID) == 0) { - efi.smbios = (void *) config_tables[i].table; + efi.smbios = config_tables[i].table; printk(KERN_INFO " SMBIOS=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, HCDP_TABLE_GUID) == 0) { - efi.hcdp = (void *)config_tables[i].table; + efi.hcdp = config_tables[i].table; printk(KERN_INFO " HCDP=0x%lx ", config_tables[i].table); } else if (efi_guidcmp(config_tables[i].guid, UGA_IO_PROTOCOL_GUID) == 0) { - efi.uga = (void *)config_tables[i].table; + efi.uga = config_tables[i].table; printk(KERN_INFO " UGA=0x%lx ", config_tables[i].table); } } @@ -543,7 +552,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (md->type) { case EFI_RESERVED_TYPE: res->name = "Reserved Memory"; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 4d70472..cfc683f 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -226,6 +226,10 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) + testl $TF_MASK,EFLAGS(%esp) + jz no_singlestep + orl $_TIF_SINGLESTEP,TI_flags(%ebp) +no_singlestep: # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e0b7c63..3debc2e 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -450,7 +450,6 @@ int_msg: .globl boot_gdt_descr .globl idt_descr -.globl cpu_gdt_descr ALIGN # early boot GDT descriptor (must use 1:1 address mapping) @@ -470,8 +469,6 @@ cpu_gdt_descr: .word GDT_ENTRIES*8-1 .long cpu_gdt_table - .fill NR_CPUS-1,8,0 # space for the other GDT descriptors - /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -485,7 +482,7 @@ ENTRY(boot_gdt_table) /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align PAGE_SIZE_asm + .align L1_CACHE_BYTES ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 39d9a5f..f8f132a 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -351,8 +351,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) { int i, j; Dprintk("Rotating IRQs among CPUs.\n"); - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { + for_each_online_cpu(i) { + for (j = 0; j < NR_IRQS; j++) { if (!irq_desc[j].action) continue; /* Is it a significant load ? */ @@ -381,7 +381,7 @@ static void do_irq_balance(void) unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; - for (i = 0; i < NR_CPUS; i++) { + for_each_possible_cpu(i) { int package_index; CPU_IRQ(i) = 0; if (!cpu_online(i)) @@ -422,9 +422,7 @@ static void do_irq_balance(void) } } /* Find the least loaded processor package */ - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { @@ -441,9 +439,7 @@ tryanothercpu: */ tmp_cpu_irq = 0; tmp_loaded = -1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) @@ -619,9 +615,7 @@ static int __init balanced_irq_init(void) if (smp_num_siblings > 1 && !cpus_empty(tmp)) physical_balance = 1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { @@ -638,9 +632,11 @@ static int __init balanced_irq_init(void) else printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); failed: - for (i = 0; i < NR_CPUS; i++) { + for_each_possible_cpu(i) { kfree(irq_cpu_data[i].irq_delta); + irq_cpu_data[i].irq_delta = NULL; kfree(irq_cpu_data[i].last_irq); + irq_cpu_data[i].last_irq = NULL; } return 0; } @@ -648,7 +644,7 @@ failed: int __init irqbalance_disable(char *str) { irqbalance_disabled = 1; - return 0; + return 1; } __setup("noirqbalance", irqbalance_disable); @@ -1761,7 +1757,8 @@ static void __init setup_ioapic_ids_from_mpc(void) * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) return; /* * This is broken; anything with a real cpu count has to diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 694a139..f197687 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -35,12 +35,56 @@ #include <asm/cacheflush.h> #include <asm/kdebug.h> #include <asm/desc.h> +#include <asm/uaccess.h> void jprobe_return_end(void); DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); +/* insert a jmp code */ +static inline void set_jmp_op(void *from, void *to) +{ + struct __arch_jmp_op { + char op; + long raddr; + } __attribute__((packed)) *jop; + jop = (struct __arch_jmp_op *)from; + jop->raddr = (long)(to) - ((long)(from) + 5); + jop->op = RELATIVEJUMP_INSTRUCTION; +} + +/* + * returns non-zero if opcodes can be boosted. + */ +static inline int can_boost(kprobe_opcode_t opcode) +{ + switch (opcode & 0xf0 ) { + case 0x70: + return 0; /* can't boost conditional jump */ + case 0x90: + /* can't boost call and pushf */ + return opcode != 0x9a && opcode != 0x9c; + case 0xc0: + /* can't boost undefined opcodes and soft-interruptions */ + return (0xc1 < opcode && opcode < 0xc6) || + (0xc7 < opcode && opcode < 0xcc) || opcode == 0xcf; + case 0xd0: + /* can boost AA* and XLAT */ + return (opcode == 0xd4 || opcode == 0xd5 || opcode == 0xd7); + case 0xe0: + /* can boost in/out and (may be) jmps */ + return (0xe3 < opcode && opcode != 0xe8); + case 0xf0: + /* clear and set flags can be boost */ + return (opcode == 0xf5 || (0xf7 < opcode && opcode < 0xfe)); + default: + /* currently, can't boost 2 bytes opcodes */ + return opcode != 0x0f; + } +} + + /* * returns non-zero if opcode modifies the interrupt flag. */ @@ -65,6 +109,11 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); p->opcode = *p->addr; + if (can_boost(p->opcode)) { + p->ainsn.boostable = 0; + } else { + p->ainsn.boostable = -1; + } return 0; } @@ -84,9 +133,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); + mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); + mutex_unlock(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) @@ -155,9 +204,13 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p; int ret = 0; - kprobe_opcode_t *addr = NULL; - unsigned long *lp; + kprobe_opcode_t *addr; struct kprobe_ctlblk *kcb; +#ifdef CONFIG_PREEMPT + unsigned pre_preempt_count = preempt_count(); +#endif /* CONFIG_PREEMPT */ + + addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); /* * We don't want to be preempted for the entire @@ -166,17 +219,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) preempt_disable(); kcb = get_kprobe_ctlblk(); - /* Check if the application is using LDT entry for its code segment and - * calculate the address by reading the base address from the LDT entry. - */ - if ((regs->xcs & 4) && (current->mm)) { - lp = (unsigned long *) ((unsigned long)((regs->xcs >> 3) * 8) - + (char *) current->mm->context.ldt); - addr = (kprobe_opcode_t *) (get_desc_base(lp) + regs->eip - - sizeof(kprobe_opcode_t)); - } else { - addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); - } /* Check we're not actually recursing */ if (kprobe_running()) { p = get_kprobe(addr); @@ -252,6 +294,21 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) /* handler has already set things up, so skip ss setup */ return 1; + if (p->ainsn.boostable == 1 && +#ifdef CONFIG_PREEMPT + !(pre_preempt_count) && /* + * This enables booster when the direct + * execution path aren't preempted. + */ +#endif /* CONFIG_PREEMPT */ + !p->post_handler && !p->break_handler ) { + /* Boost up -- we can execute copied instructions directly */ + reset_current_kprobe(); + regs->eip = (unsigned long)p->ainsn.insn; + preempt_enable_no_resched(); + return 1; + } + ss_probe: prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; @@ -267,17 +324,44 @@ no_kprobe: * here. When a retprobed function returns, this probe is hit and * trampoline_probe_handler() runs, calling the kretprobe's handler. */ - void kretprobe_trampoline_holder(void) + void __kprobes kretprobe_trampoline_holder(void) { - asm volatile ( ".global kretprobe_trampoline\n" + asm volatile ( ".global kretprobe_trampoline\n" "kretprobe_trampoline: \n" - "nop\n"); - } + " pushf\n" + /* skip cs, eip, orig_eax, es, ds */ + " subl $20, %esp\n" + " pushl %eax\n" + " pushl %ebp\n" + " pushl %edi\n" + " pushl %esi\n" + " pushl %edx\n" + " pushl %ecx\n" + " pushl %ebx\n" + " movl %esp, %eax\n" + " call trampoline_handler\n" + /* move eflags to cs */ + " movl 48(%esp), %edx\n" + " movl %edx, 44(%esp)\n" + /* save true return address on eflags */ + " movl %eax, 48(%esp)\n" + " popl %ebx\n" + " popl %ecx\n" + " popl %edx\n" + " popl %esi\n" + " popl %edi\n" + " popl %ebp\n" + " popl %eax\n" + /* skip eip, orig_eax, es, ds */ + " addl $16, %esp\n" + " popf\n" + " ret\n"); +} /* - * Called when we hit the probe point at kretprobe_trampoline + * Called from kretprobe_trampoline */ -int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +fastcall void *__kprobes trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head; @@ -306,8 +390,11 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) /* another task is sharing our hash bucket */ continue; - if (ri->rp && ri->rp->handler) + if (ri->rp && ri->rp->handler){ + __get_cpu_var(current_kprobe) = &ri->rp->kp; ri->rp->handler(ri, regs); + __get_cpu_var(current_kprobe) = NULL; + } orig_ret_address = (unsigned long)ri->ret_addr; recycle_rp_inst(ri); @@ -322,18 +409,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) } BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); - regs->eip = orig_ret_address; - reset_current_kprobe(); spin_unlock_irqrestore(&kretprobe_lock, flags); - preempt_enable_no_resched(); - /* - * By returning a non-zero value, we are telling - * kprobe_handler() that we don't want the post_handler - * to run (and have re-enabled preemption) - */ - return 1; + return (void*)orig_ret_address; } /* @@ -357,15 +436,17 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) * 2) If the single-stepped instruction was a call, the return address * that is atop the stack is the address following the copied instruction. * We need to make it the address following the original instruction. + * + * This function also checks instruction size for preparing direct execution. */ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { unsigned long *tos = (unsigned long *)®s->esp; - unsigned long next_eip = 0; unsigned long copy_eip = (unsigned long)p->ainsn.insn; unsigned long orig_eip = (unsigned long)p->addr; + regs->eflags &= ~TF_MASK; switch (p->ainsn.insn[0]) { case 0x9c: /* pushfl */ *tos &= ~(TF_MASK | IF_MASK); @@ -375,37 +456,51 @@ static void __kprobes resume_execution(struct kprobe *p, case 0xcb: case 0xc2: case 0xca: - regs->eflags &= ~TF_MASK; - /* eip is already adjusted, no more changes required*/ - return; + case 0xea: /* jmp absolute -- eip is correct */ + /* eip is already adjusted, no more changes required */ + p->ainsn.boostable = 1; + goto no_change; case 0xe8: /* call relative - Fix return addr */ *tos = orig_eip + (*tos - copy_eip); break; case 0xff: if ((p->ainsn.insn[1] & 0x30) == 0x10) { /* call absolute, indirect */ - /* Fix return addr; eip is correct. */ - next_eip = regs->eip; + /* + * Fix return addr; eip is correct. + * But this is not boostable + */ *tos = orig_eip + (*tos - copy_eip); + goto no_change; } else if (((p->ainsn.insn[1] & 0x31) == 0x20) || /* jmp near, absolute indirect */ ((p->ainsn.insn[1] & 0x31) == 0x21)) { /* jmp far, absolute indirect */ - /* eip is correct. */ - next_eip = regs->eip; + /* eip is correct. And this is boostable */ + p->ainsn.boostable = 1; + goto no_change; } - break; - case 0xea: /* jmp absolute -- eip is correct */ - next_eip = regs->eip; - break; default: break; } - regs->eflags &= ~TF_MASK; - if (next_eip) { - regs->eip = next_eip; - } else { - regs->eip = orig_eip + (regs->eip - copy_eip); + if (p->ainsn.boostable == 0) { + if ((regs->eip > copy_eip) && + (regs->eip - copy_eip) + 5 < MAX_INSN_SIZE) { + /* + * These instructions can be executed directly if it + * jumps back to correct address. + */ + set_jmp_op((void *)regs->eip, + (void *)orig_eip + (regs->eip - copy_eip)); + p->ainsn.boostable = 1; + } else { + p->ainsn.boostable = -1; + } } + + regs->eip = orig_eip + (regs->eip - copy_eip); + +no_change: + return; } /* @@ -453,15 +548,57 @@ static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr) struct kprobe *cur = kprobe_running(); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); - if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) - return 1; - - if (kcb->kprobe_status & KPROBE_HIT_SS) { - resume_execution(cur, regs, kcb); + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the eip points back to the probe address + * and allow the page fault handler to continue as a + * normal page fault. + */ + regs->eip = (unsigned long)cur->addr; regs->eflags |= kcb->kprobe_old_eflags; - - reset_current_kprobe(); + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * We increment the nmissed count for accounting, + * we can also use npre/npostfault count for accouting + * these specific fault cases. + */ + kprobes_inc_nmissed_count(cur); + + /* + * We come here because instructions in the pre/post + * handler caused the page_fault, this could happen + * if handler tries to access user space by + * copy_from_user(), get_user() etc. Let the + * user-specified handler try to fix it first. + */ + if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) + return 1; + + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (fixup_exception(regs)) + return 1; + + /* + * fixup_exception() could not handle it, + * Let do_page_fault() fix it. + */ + break; + default: + break; } return 0; } @@ -475,6 +612,9 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int ret = NOTIFY_DONE; + if (args->regs && user_mode(args->regs)) + return ret; + switch (val) { case DIE_INT3: if (kprobe_handler(args->regs)) @@ -564,12 +704,7 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) return 0; } -static struct kprobe trampoline_p = { - .addr = (kprobe_opcode_t *) &kretprobe_trampoline, - .pre_handler = trampoline_probe_handler -}; - int __init arch_init_kprobes(void) { - return register_kprobe(&trampoline_p); + return 0; } diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c index 5390b52..e7c138f 100644 --- a/arch/i386/kernel/microcode.c +++ b/arch/i386/kernel/microcode.c @@ -81,6 +81,7 @@ #include <linux/miscdevice.h> #include <linux/spinlock.h> #include <linux/mm.h> +#include <linux/mutex.h> #include <asm/msr.h> #include <asm/uaccess.h> @@ -114,7 +115,7 @@ MODULE_LICENSE("GPL"); static DEFINE_SPINLOCK(microcode_update_lock); /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ -static DECLARE_MUTEX(microcode_sem); +static DEFINE_MUTEX(microcode_mutex); static void __user *user_buffer; /* user area microcode data buffer */ static unsigned int user_buffer_size; /* it's size */ @@ -202,8 +203,6 @@ static inline void mark_microcode_update (int cpu_num, microcode_header_t *mc_he } else if (mc_header->rev == uci->rev) { /* notify the caller of success on this cpu */ uci->err = MC_SUCCESS; - printk(KERN_ERR "microcode: CPU%d already at revision" - " 0x%x (current=0x%x)\n", cpu_num, mc_header->rev, uci->rev); goto out; } @@ -369,7 +368,6 @@ static void do_update_one (void * unused) struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; if (uci->mc == NULL) { - printk(KERN_INFO "microcode: No new microcode data for CPU%d\n", cpu_num); return; } @@ -447,7 +445,7 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ return -EINVAL; } - down(µcode_sem); + mutex_lock(µcode_mutex); user_buffer = (void __user *) buf; user_buffer_size = (int) len; @@ -456,31 +454,14 @@ static ssize_t microcode_write (struct file *file, const char __user *buf, size_ if (!ret) ret = (ssize_t)len; - up(µcode_sem); + mutex_unlock(µcode_mutex); return ret; } -static int microcode_ioctl (struct inode *inode, struct file *file, - unsigned int cmd, unsigned long arg) -{ - switch (cmd) { - /* - * XXX: will be removed after microcode_ctl - * is updated to ignore failure of this ioctl() - */ - case MICROCODE_IOCFREE: - return 0; - default: - return -EINVAL; - } - return -EINVAL; -} - static struct file_operations microcode_fops = { .owner = THIS_MODULE, .write = microcode_write, - .ioctl = microcode_ioctl, .open = microcode_open, }; @@ -511,7 +492,6 @@ static int __init microcode_init (void) static void __exit microcode_exit (void) { misc_deregister(µcode_dev); - printk(KERN_INFO "IA-32 Microcode Update Driver v" MICROCODE_VERSION " unregistered\n"); } module_init(microcode_init) diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index 5149c8a..470cf97 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -104,26 +104,38 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } -extern void apply_alternatives(void *start, void *end); - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - /* look for .altinstructions to patch */ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - void *seg; - if (strcmp(".altinstructions", secstrings + s->sh_name)) - continue; - seg = (void *)s->sh_addr; - apply_alternatives(seg, seg + s->sh_size); - } + if (!strcmp(".text", secstrings + s->sh_name)) + text = s; + if (!strcmp(".altinstructions", secstrings + s->sh_name)) + alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks= s; + } + + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; + apply_alternatives(aseg, aseg + alt->sh_size); + } + if (locks && text) { + void *lseg = (void *)locks->sh_addr; + void *tseg = (void *)text->sh_addr; + alternatives_smp_module_add(me, me->name, + lseg, lseg + locks->sh_size, + tseg, tseg + text->sh_size); + } return 0; } void module_arch_cleanup(struct module *mod) { + alternatives_smp_module_del(mod); } diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index e6e2f43..8d8aa9d 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -828,6 +828,8 @@ void __init find_smp_config (void) smp_scan_config(address, 0x400); } +int es7000_plat; + /* -------------------------------------------------------------------------- ACPI-based MP Configuration -------------------------------------------------------------------------- */ @@ -935,7 +937,8 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) tmpid = io_apic_get_unique_id(idx, id); else tmpid = id; @@ -1011,8 +1014,6 @@ void __init mp_override_legacy_irq ( return; } -int es7000_plat; - void __init mp_config_acpi_legacy_irqs (void) { struct mpc_config_intsrc intsrc; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index be87c5e..d43b498 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -138,12 +138,12 @@ static int __init check_nmi_watchdog(void) if (nmi_watchdog == NMI_LOCAL_APIC) smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); - for_each_cpu(cpu) + for_each_possible_cpu(cpu) prev_nmi_count[cpu] = per_cpu(irq_stat, cpu).__nmi_count; local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_possible_cpu(cpu) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ @@ -510,7 +510,7 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for (i = 0; i < NR_CPUS; i++) + for_each_possible_cpu(i) alert_counter[i] = 0; /* @@ -529,7 +529,8 @@ void nmi_watchdog_tick (struct pt_regs * regs) * always switch the stack NMI-atomically, it's safe to use * smp_processor_id(). */ - int sum, cpu = smp_processor_id(); + unsigned int sum; + int cpu = smp_processor_id(); sum = per_cpu(irq_stat, cpu).apic_timer_irqs; @@ -543,7 +544,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - die_nmi(regs, "NMI Watchdog detected LOCKUP"); + die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 0480454..6259afe 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -38,7 +38,6 @@ #include <linux/kallsyms.h> #include <linux/ptrace.h> #include <linux/random.h> -#include <linux/kprobes.h> #include <asm/uaccess.h> #include <asm/pgtable.h> @@ -295,7 +294,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode(regs)) + if (user_mode_vm(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), system_utsname.release, @@ -364,13 +363,6 @@ void exit_thread(void) struct task_struct *tsk = current; struct thread_struct *t = &tsk->thread; - /* - * Remove function-return probe instances associated with this task - * and put them back on the free list. Do not insert an exit probe for - * this function, it will be disabled by kprobe_flush_task if you do. - */ - kprobe_flush_task(tsk); - /* The process may have allocated an io port bitmap... nuke it. */ if (unlikely(NULL != t->io_bitmap_ptr)) { int cpu = get_cpu(); @@ -789,7 +781,6 @@ unsigned long get_wchan(struct task_struct *p) } while (count++ < 16); return 0; } -EXPORT_SYMBOL(get_wchan); /* * sys_alloc_thread_area: get a yet unused TLS descriptor index. diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5c1fb6a..506462e 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -34,10 +34,10 @@ /* * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). * Also masks reserved bits (31-22, 15, 5, 3, 1). */ -#define FLAG_MASK 0x00054dd5 +#define FLAG_MASK 0x00050dd5 /* set's the trap flag. */ #define TRAP_FLAG 0x100 diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c index 7455ab6..967dc74 100644 --- a/arch/i386/kernel/semaphore.c +++ b/arch/i386/kernel/semaphore.c @@ -110,11 +110,11 @@ asm( ".align 4\n" ".globl __write_lock_failed\n" "__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" + LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" "1: rep; nop\n\t" "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jnz __write_lock_failed\n\t" "ret" ); @@ -124,11 +124,11 @@ asm( ".align 4\n" ".globl __read_lock_failed\n" "__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" + LOCK_PREFIX "incl (%eax)\n" "1: rep; nop\n\t" "cmpl $1,(%eax)\n\t" "js 1b\n\t" - LOCK "decl (%eax)\n\t" + LOCK_PREFIX "decl (%eax)\n\t" "js __read_lock_failed\n\t" "ret" ); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index a0b4075..eacc3f0 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -47,6 +47,7 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/dmi.h> +#include <linux/pfn.h> #include <video/edid.h> @@ -1059,10 +1060,10 @@ static int __init free_available_memory(unsigned long start, unsigned long end, void *arg) { /* check max_low_pfn */ - if (start >= ((max_low_pfn + 1) << PAGE_SHIFT)) + if (start >= (max_low_pfn << PAGE_SHIFT)) return 0; - if (end >= ((max_low_pfn + 1) << PAGE_SHIFT)) - end = (max_low_pfn + 1) << PAGE_SHIFT; + if (end >= (max_low_pfn << PAGE_SHIFT)) + end = max_low_pfn << PAGE_SHIFT; if (start < end) free_bootmem(start, end - start); @@ -1287,9 +1288,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat probe_roms(); for (i = 0; i < e820.nr_map; i++) { struct resource *res; - if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) - continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; @@ -1317,13 +1316,15 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources + * + * This is called just before pcibios_assign_resources(), which is also + * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c). */ -static void __init register_memory(void) +static int __init request_standard_resources(void) { - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; + int i; + printk("Setting up standard PCI resources\n"); if (efi_enabled) efi_initialize_iomem_resources(&code_resource, &data_resource); else @@ -1335,6 +1336,16 @@ static void __init register_memory(void) /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +fs_initcall(request_standard_resources); + +static void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; /* * Search for the bigest gap in the low 32 bits of the e820 @@ -1378,101 +1389,6 @@ static void __init register_memory(void) pci_mem_start, gapstart, gapsize); } -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - unsigned char **noptable = intel_nops; - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - for (a = start; (void *)a < end; a++) { - if (!boot_cpu_has(a->cpuid)) - continue; - BUG_ON(a->replacementlen > a->instrlen); - memcpy(a->instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } - } -} - -void __init alternative_instructions(void) -{ - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - static char * __init machine_specific_memory_setup(void); #ifdef CONFIG_MCA @@ -1555,6 +1471,16 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(cmdline_p); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + setup_early_printk(strchr(s, '=') + 1); + printk("early console enabled\n"); + } + } +#endif + max_low_pfn = setup_memory(); /* @@ -1579,19 +1505,6 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - extern void setup_early_printk(char *); - - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } - } -#endif - - dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 963616d..5c352c3 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -123,7 +123,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax err |= __get_user(tmp, &sc->seg); \ loadsegment(seg,tmp); } -#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \ +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_RF | \ + X86_EFLAGS_OF | X86_EFLAGS_DF | \ X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) @@ -582,9 +583,6 @@ static void fastcall do_signal(struct pt_regs *regs) if (!user_mode(regs)) return; - if (try_to_freeze()) - goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else @@ -613,7 +611,6 @@ static void fastcall do_signal(struct pt_regs *regs) return; } -no_signal: /* Did we come from a system call? */ if (regs->orig_eax >= 0) { /* Restart the system call - no handlers present */ diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 218d725..d134e96 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -504,27 +504,23 @@ void unlock_ipi_call_lock(void) spin_unlock_irq(&call_lock); } -static struct call_data_struct * call_data; - -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ - -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -/* - * [SUMMARY] Run a function on all other CPUs. - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> currently unused. - * <wait> If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. Does not return until +static struct call_data_struct *call_data; + +/** + * smp_call_function(): Run a function on all other CPUs. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: currently unused. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. Does not return until * remote CPUs are nearly ready to execute <<func>> or are or have executed. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) { struct call_data_struct data; int cpus; diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index eba7f53..a696990 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -72,6 +72,9 @@ int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; /* Core ID of each logical CPU */ int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID}; +/* Last level cache ID of each logical CPU */ +int cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; + /* representing HT siblings of each logical CPU */ cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; EXPORT_SYMBOL(cpu_sibling_map); @@ -440,6 +443,18 @@ static void __devinit smp_callin(void) static int cpucount; +/* maps the cpu to the sched domain representing multi-core */ +cpumask_t cpu_coregroup_map(int cpu) +{ + struct cpuinfo_x86 *c = cpu_data + cpu; + /* + * For perf, we return last level cache shared map. + * TBD: when power saving sched policy is added, we will return + * cpu_core_map when power saving policy is enabled + */ + return c->llc_shared_map; +} + /* representing cpus for which sibling maps can be computed */ static cpumask_t cpu_sibling_setup_map; @@ -459,12 +474,16 @@ set_cpu_sibling_map(int cpu) cpu_set(cpu, cpu_sibling_map[i]); cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); } } } else { cpu_set(cpu, cpu_sibling_map[cpu]); } + cpu_set(cpu, c[cpu].llc_shared_map); + if (current_cpu_data.x86_max_cores == 1) { cpu_core_map[cpu] = cpu_sibling_map[cpu]; c[cpu].booted_cores = 1; @@ -472,6 +491,11 @@ set_cpu_sibling_map(int cpu) } for_each_cpu_mask(i, cpu_sibling_setup_map) { + if (cpu_llc_id[cpu] != BAD_APICID && + cpu_llc_id[cpu] == cpu_llc_id[i]) { + cpu_set(i, c[cpu].llc_shared_map); + cpu_set(cpu, c[i].llc_shared_map); + } if (phys_proc_id[cpu] == phys_proc_id[i]) { cpu_set(i, cpu_core_map[cpu]); cpu_set(cpu, cpu_core_map[i]); @@ -899,6 +923,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned short nmi_high = 0, nmi_low = 0; ++cpucount; + alternatives_smp_switch(1); /* * We can't use kernel_thread since we must avoid to @@ -1002,7 +1027,6 @@ void cpu_exit_clear(void) cpu_clear(cpu, cpu_callout_map); cpu_clear(cpu, cpu_callin_map); - cpu_clear(cpu, cpu_present_map); cpu_clear(cpu, smp_commenced_mask); unmap_cpu_to_logical_apicid(cpu); @@ -1014,21 +1038,20 @@ struct warm_boot_cpu_info { int cpu; }; -static void __devinit do_warm_boot_cpu(void *p) +static void __cpuinit do_warm_boot_cpu(void *p) { struct warm_boot_cpu_info *info = p; do_boot_cpu(info->apicid, info->cpu); complete(info->complete); } -int __devinit smp_prepare_cpu(int cpu) +static int __cpuinit __smp_prepare_cpu(int cpu) { DECLARE_COMPLETION(done); struct warm_boot_cpu_info info; struct work_struct task; int apicid, ret; - lock_cpu_hotplug(); apicid = x86_cpu_to_apicid[cpu]; if (apicid == BAD_APICID) { ret = -ENODEV; @@ -1053,7 +1076,6 @@ int __devinit smp_prepare_cpu(int cpu) zap_low_mappings(); ret = 0; exit: - unlock_cpu_hotplug(); return ret; } #endif @@ -1358,6 +1380,8 @@ void __cpu_die(unsigned int cpu) /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { printk ("CPU %d is now offline\n", cpu); + if (1 == num_online_cpus()) + alternatives_smp_switch(0); return; } msleep(100); @@ -1379,6 +1403,22 @@ void __cpu_die(unsigned int cpu) int __devinit __cpu_up(unsigned int cpu) { +#ifdef CONFIG_HOTPLUG_CPU + int ret=0; + + /* + * We do warm boot only on cpus that had booted earlier + * Otherwise cold boot is all handled from smp_boot_cpus(). + * cpu_callin_map is set during AP kickstart process. Its reset + * when a cpu is taken offline from cpu_exit_clear(). + */ + if (!cpu_isset(cpu, cpu_callin_map)) + ret = __smp_prepare_cpu(cpu); + + if (ret) + return -EIO; +#endif + /* In case one didn't come up */ if (!cpu_isset(cpu, cpu_callin_map)) { printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu); diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index a4a6197..8fdb1fb 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c @@ -40,14 +40,13 @@ asmlinkage int sys_pipe(unsigned long __user * fildes) return error; } -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { int error = -EBADF; - struct file * file = NULL; + struct file *file = NULL; + struct mm_struct *mm = current->mm; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { @@ -56,9 +55,9 @@ static inline long do_mmap2( goto out; } - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); if (file) fput(file); @@ -66,13 +65,6 @@ out: return error; } -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -101,7 +93,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + err = sys_mmap2(a.addr, a.len, a.prot, a.flags, + a.fd, a.offset >> PAGE_SHIFT); out: return err; } diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index ac687d0..4f58b9c 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S @@ -310,3 +310,7 @@ ENTRY(sys_call_table) .long sys_pselect6 .long sys_ppoll .long sys_unshare /* 310 */ + .long sys_set_robust_list + .long sys_get_robust_list + .long sys_splice + .long sys_sync_file_range diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index be24272..17a6fe7 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -46,7 +46,7 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale; +static unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) diff --git a/arch/i386/kernel/timers/timer_pm.c b/arch/i386/kernel/timers/timer_pm.c index 264edaa..144e94a 100644 --- a/arch/i386/kernel/timers/timer_pm.c +++ b/arch/i386/kernel/timers/timer_pm.c @@ -15,6 +15,7 @@ #include <linux/module.h> #include <linux/device.h> #include <linux/init.h> +#include <linux/pci.h> #include <asm/types.h> #include <asm/timer.h> #include <asm/smp.h> @@ -45,24 +46,31 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; #define ACPI_PM_MASK 0xFFFFFF /* limit it to 24 bits */ +static int pmtmr_need_workaround __read_mostly = 1; + /*helper function to safely read acpi pm timesource*/ static inline u32 read_pmtmr(void) { - u32 v1=0,v2=0,v3=0; - /* It has been reported that because of various broken - * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time - * source is not latched, so you must read it multiple - * times to insure a safe value is read. - */ - do { - v1 = inl(pmtmr_ioport); - v2 = inl(pmtmr_ioport); - v3 = inl(pmtmr_ioport); - } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) - || (v3 > v1 && v3 < v2)); - - /* mask the output to 24 bits */ - return v2 & ACPI_PM_MASK; + if (pmtmr_need_workaround) { + u32 v1, v2, v3; + + /* It has been reported that because of various broken + * chipsets (ICH4, PIIX4 and PIIX4E) where the ACPI PM time + * source is not latched, so you must read it multiple + * times to insure a safe value is read. + */ + do { + v1 = inl(pmtmr_ioport); + v2 = inl(pmtmr_ioport); + v3 = inl(pmtmr_ioport); + } while ((v1 > v2 && v1 < v3) || (v2 > v3 && v2 < v1) + || (v3 > v1 && v3 < v2)); + + /* mask the output to 24 bits */ + return v2 & ACPI_PM_MASK; + } + + return inl(pmtmr_ioport) & ACPI_PM_MASK; } @@ -263,6 +271,72 @@ struct init_timer_opts __initdata timer_pmtmr_init = { .opts = &timer_pmtmr, }; +#ifdef CONFIG_PCI +/* + * PIIX4 Errata: + * + * The power management timer may return improper results when read. + * Although the timer value settles properly after incrementing, + * while incrementing there is a 3 ns window every 69.8 ns where the + * timer value is indeterminate (a 4.2% chance that the data will be + * incorrect when read). As a result, the ACPI free running count up + * timer specification is violated due to erroneous reads. + */ +static int __init pmtmr_bug_check(void) +{ + static struct pci_device_id gray_list[] __initdata = { + /* these chipsets may have bug. */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82801DB_0) }, + { }, + }; + struct pci_dev *dev; + int pmtmr_has_bug = 0; + u8 rev; + + if (cur_timer != &timer_pmtmr || !pmtmr_need_workaround) + return 0; + + dev = pci_get_device(PCI_VENDOR_ID_INTEL, + PCI_DEVICE_ID_INTEL_82371AB_3, NULL); + if (dev) { + pci_read_config_byte(dev, PCI_REVISION_ID, &rev); + /* the bug has been fixed in PIIX4M */ + if (rev < 3) { + printk(KERN_WARNING "* Found PM-Timer Bug on this " + "chipset. Due to workarounds for a bug,\n" + "* this time source is slow. Consider trying " + "other time sources (clock=)\n"); + pmtmr_has_bug = 1; + } + pci_dev_put(dev); + } + + if (pci_dev_present(gray_list)) { + printk(KERN_WARNING "* This chipset may have PM-Timer Bug. Due" + " to workarounds for a bug,\n" + "* this time source is slow. If you are sure your timer" + " does not have\n" + "* this bug, please use \"pmtmr_good\" to disable the " + "workaround\n"); + pmtmr_has_bug = 1; + } + + if (!pmtmr_has_bug) + pmtmr_need_workaround = 0; + + return 0; +} +device_initcall(pmtmr_bug_check); +#endif + +static int __init pmtr_good_setup(char *__str) +{ + pmtmr_need_workaround = 0; + return 1; +} +__setup("pmtmr_good", pmtr_good_setup); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); MODULE_DESCRIPTION("Power Management Timer (PMTMR) as primary timing source for x86"); diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index a7f5a2a..5e41ee2 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -74,7 +74,7 @@ late_initcall(start_lost_tick_compensation); * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale; +static unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 67a0e1b..2963552 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -41,6 +41,15 @@ int arch_register_cpu(int num){ parent = &node_devices[node].node; #endif /* CONFIG_NUMA */ + /* + * CPU0 cannot be offlined due to several + * restrictions and assumptions in kernel. This basically + * doesnt add a control file, one cannot attempt to offline + * BSP. + */ + if (!num) + cpu_devices[num].cpu.no_control = 1; + return register_cpu(&cpu_devices[num].cpu, num, parent); } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index b814dbd..e385279 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -92,32 +92,51 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; -struct notifier_block *i386die_chain; -static DEFINE_SPINLOCK(die_notifier_lock); +ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) { - int err = 0; - unsigned long flags; - spin_lock_irqsave(&die_notifier_lock, flags); - err = notifier_chain_register(&i386die_chain, nb); - spin_unlock_irqrestore(&die_notifier_lock, flags); - return err; + vmalloc_sync_all(); + return atomic_notifier_chain_register(&i386die_chain, nb); } EXPORT_SYMBOL(register_die_notifier); +int unregister_die_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&i386die_chain, nb); +} +EXPORT_SYMBOL(unregister_die_notifier); + static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { return p > (void *)tinfo && p < (void *)tinfo + THREAD_SIZE - 3; } -static void print_addr_and_symbol(unsigned long addr, char *log_lvl) +/* + * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. + */ +static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, + int printed) { - printk(log_lvl); + if (!printed) + printk(log_lvl); + +#if CONFIG_STACK_BACKTRACE_COLS == 1 printk(" [<%08lx>] ", addr); +#else + printk(" <%08lx> ", addr); +#endif print_symbol("%s", addr); - printk("\n"); + + printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; + + if (printed) + printk(" "); + else + printk("\n"); + + return printed; } static inline unsigned long print_context_stack(struct thread_info *tinfo, @@ -125,20 +144,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, char *log_lvl) { unsigned long addr; + int printed = 0; /* nr of entries already printed on current line */ #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); ebp = *(unsigned long *)ebp; } #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); } #endif + if (printed) + printk("\n"); + return ebp; } @@ -166,8 +189,7 @@ static void show_trace_log_lvl(struct task_struct *task, stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk(log_lvl); - printk(" =======================\n"); + printk("%s =======================\n", log_lvl); } } @@ -194,21 +216,17 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, for(i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) { - printk("\n"); - printk(log_lvl); - printk(" "); - } + if (i && ((i % 8) == 0)) + printk("\n%s ", log_lvl); printk("%08lx ", *stack++); } - printk("\n"); - printk(log_lvl); - printk("Call Trace:\n"); + printk("\n%sCall Trace:\n", log_lvl); show_trace_log_lvl(task, esp, log_lvl); } void show_stack(struct task_struct *task, unsigned long *esp) { + printk(" "); show_stack_log_lvl(task, esp, ""); } @@ -233,7 +251,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -333,6 +351,8 @@ void die(const char * str, struct pt_regs * regs, long err) static int die_counter; unsigned long flags; + oops_enter(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irqsave(&die.lock, flags); @@ -365,8 +385,12 @@ void die(const char * str, struct pt_regs * regs, long err) #endif if (nl) printk("\n"); - notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); - show_registers(regs); + if (notify_die(DIE_OOPS, str, regs, err, + current->thread.trap_no, SIGSEGV) != + NOTIFY_STOP) + show_registers(regs); + else + regs = NULL; } else printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); @@ -374,6 +398,9 @@ void die(const char * str, struct pt_regs * regs, long err) die.lock_owner = -1; spin_unlock_irqrestore(&die.lock, flags); + if (!regs) + return; + if (kexec_should_crash(current)) crash_kexec(regs); @@ -385,6 +412,7 @@ void die(const char * str, struct pt_regs * regs, long err) ssleep(5); panic("Fatal exception"); } + oops_exit(); do_exit(SIGSEGV); } @@ -601,7 +629,7 @@ static DEFINE_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { - if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 0, SIGINT) == + if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -623,7 +651,7 @@ void die_nmi (struct pt_regs *regs, const char *msg) /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } @@ -640,7 +668,7 @@ static void default_do_nmi(struct pt_regs * regs) reason = get_nmi_reason(); if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; #ifdef CONFIG_X86_LOCAL_APIC @@ -656,7 +684,7 @@ static void default_do_nmi(struct pt_regs * regs) unknown_nmi_error(reason, regs); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; if (reason & 0x80) mem_parity_error(reason, regs); @@ -694,6 +722,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } EXPORT_SYMBOL_GPL(set_nmi_callback); @@ -1164,6 +1193,6 @@ void __init trap_init(void) static int __init kstack_setup(char *s) { kstack_depth_to_print = simple_strtoul(s, NULL, 0); - return 0; + return 1; } __setup("kstack=", kstack_setup); diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index f51c894..aee14fa 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -43,6 +43,7 @@ #include <linux/smp_lock.h> #include <linux/highmem.h> #include <linux/ptrace.h> +#include <linux/audit.h> #include <asm/uaccess.h> #include <asm/io.h> @@ -252,6 +253,7 @@ out: static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk) { struct tss_struct *tss; + long eax; /* * make sure the vm86() system call doesn't try to do anything silly */ @@ -305,13 +307,19 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk tsk->thread.screen_bitmap = info->screen_bitmap; if (info->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); + __asm__ __volatile__("xorl %eax,%eax; movl %eax,%fs; movl %eax,%gs\n\t"); + __asm__ __volatile__("movl %%eax, %0\n" :"=r"(eax)); + + /*call audit_syscall_exit since we do not exit via the normal paths */ + if (unlikely(current->audit_context)) + audit_syscall_exit(current, AUDITSC_RESULT(eax), eax); + __asm__ __volatile__( - "xorl %%eax,%%eax; movl %%eax,%%fs; movl %%eax,%%gs\n\t" "movl %0,%%esp\n\t" "movl %1,%%ebp\n\t" "jmp resume_userspace" : /* no outputs */ - :"r" (&info->regs), "r" (task_thread_info(tsk)) : "ax"); + :"r" (&info->regs), "r" (task_thread_info(tsk))); /* we never return here */ } diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 4710195..8831303 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -7,6 +7,7 @@ #include <asm-generic/vmlinux.lds.h> #include <asm/thread_info.h> #include <asm/page.h> +#include <asm/cache.h> OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") OUTPUT_ARCH(i386) @@ -68,6 +69,26 @@ SECTIONS *(.data.init_task) } + /* might get freed after init */ + . = ALIGN(4096); + __smp_alt_begin = .; + __smp_alt_instructions = .; + .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + *(.smp_altinstructions) + } + __smp_alt_instructions_end = .; + . = ALIGN(4); + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) + } + __smp_locks_end = .; + .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { + *(.smp_altinstr_replacement) + } + . = ALIGN(4096); + __smp_alt_end = .; + /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; @@ -115,7 +136,7 @@ SECTIONS __initramfs_start = .; .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { *(.init.ramfs) } __initramfs_end = .; - . = ALIGN(32); + . = ALIGN(L1_CACHE_BYTES); __per_cpu_start = .; .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } __per_cpu_end = .; diff --git a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S index fadb5bc..a92262f 100644 --- a/arch/i386/kernel/vsyscall-sigreturn.S +++ b/arch/i386/kernel/vsyscall-sigreturn.S @@ -44,7 +44,7 @@ __kernel_rt_sigreturn: .LSTARTCIEDLSI1: .long 0 /* CIE ID */ .byte 1 /* Version number */ - .string "zR" /* NUL-terminated augmentation string */ + .string "zRS" /* NUL-terminated augmentation string */ .uleb128 1 /* Code alignment factor */ .sleb128 -4 /* Data alignment factor */ .byte 8 /* Return address register column */ diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 76b7281..3b62baa 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S @@ -21,6 +21,9 @@ * instruction clobbers %esp, the user's %esp won't even survive entry * into the kernel. We store %esp in %ebp. Code in entry.S must fetch * arg6 from the stack. + * + * You can not use this vsyscall for the clone() syscall because the + * three dwords on the parent stack do not get copied to the child. */ .text .globl __kernel_vsyscall |