diff options
Diffstat (limited to 'arch/i386/kernel')
32 files changed, 548 insertions, 245 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 65656c0..5b9ed21 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -7,7 +7,7 @@ extra-y := head.o init_task.o vmlinux.lds obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ pci-dma.o i386_ksyms.o i387.o dmi_scan.o bootflag.o \ - quirks.o i8237.o topology.o + quirks.o i8237.o topology.o alternative.o obj-y += cpu/ obj-y += timers/ diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c new file mode 100644 index 0000000..5cbd6f9 --- /dev/null +++ b/arch/i386/kernel/alternative.c @@ -0,0 +1,321 @@ +#include <linux/module.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <asm/alternative.h> +#include <asm/sections.h> + +#define DEBUG 0 +#if DEBUG +# define DPRINTK(fmt, args...) printk(fmt, args) +#else +# define DPRINTK(fmt, args...) +#endif + +/* Use inline assembly to define this because the nops are defined + as inline assembly strings in the include files and we cannot + get them easily into strings. */ +asm("\t.data\nintelnops: " + GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 + GENERIC_NOP7 GENERIC_NOP8); +asm("\t.data\nk8nops: " + K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 + K8_NOP7 K8_NOP8); +asm("\t.data\nk7nops: " + K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 + K7_NOP7 K7_NOP8); + +extern unsigned char intelnops[], k8nops[], k7nops[]; +static unsigned char *intel_nops[ASM_NOP_MAX+1] = { + NULL, + intelnops, + intelnops + 1, + intelnops + 1 + 2, + intelnops + 1 + 2 + 3, + intelnops + 1 + 2 + 3 + 4, + intelnops + 1 + 2 + 3 + 4 + 5, + intelnops + 1 + 2 + 3 + 4 + 5 + 6, + intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k8_nops[ASM_NOP_MAX+1] = { + NULL, + k8nops, + k8nops + 1, + k8nops + 1 + 2, + k8nops + 1 + 2 + 3, + k8nops + 1 + 2 + 3 + 4, + k8nops + 1 + 2 + 3 + 4 + 5, + k8nops + 1 + 2 + 3 + 4 + 5 + 6, + k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static unsigned char *k7_nops[ASM_NOP_MAX+1] = { + NULL, + k7nops, + k7nops + 1, + k7nops + 1 + 2, + k7nops + 1 + 2 + 3, + k7nops + 1 + 2 + 3 + 4, + k7nops + 1 + 2 + 3 + 4 + 5, + k7nops + 1 + 2 + 3 + 4 + 5 + 6, + k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, +}; +static struct nop { + int cpuid; + unsigned char **noptable; +} noptypes[] = { + { X86_FEATURE_K8, k8_nops }, + { X86_FEATURE_K7, k7_nops }, + { -1, NULL } +}; + + +extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; +extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; +extern u8 *__smp_locks[], *__smp_locks_end[]; + +extern u8 __smp_alt_begin[], __smp_alt_end[]; + + +static unsigned char** find_nop_table(void) +{ + unsigned char **noptable = intel_nops; + int i; + + for (i = 0; noptypes[i].cpuid >= 0; i++) { + if (boot_cpu_has(noptypes[i].cpuid)) { + noptable = noptypes[i].noptable; + break; + } + } + return noptable; +} + +/* Replace instructions with better alternatives for this CPU type. + This runs before SMP is initialized to avoid SMP problems with + self modifying code. This implies that assymetric systems where + APs have less capabilities than the boot processor are not handled. + Tough. Make sure you disable such features by hand. */ + +void apply_alternatives(struct alt_instr *start, struct alt_instr *end) +{ + unsigned char **noptable = find_nop_table(); + struct alt_instr *a; + int diff, i, k; + + DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + BUG_ON(a->replacementlen > a->instrlen); + if (!boot_cpu_has(a->cpuid)) + continue; + memcpy(a->instr, a->replacement, a->replacementlen); + diff = a->instrlen - a->replacementlen; + /* Pad the rest with nops */ + for (i = a->replacementlen; diff > 0; diff -= k, i += k) { + k = diff; + if (k > ASM_NOP_MAX) + k = ASM_NOP_MAX; + memcpy(a->instr + i, noptable[k], k); + } + } +} + +static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); + for (a = start; a < end; a++) { + memcpy(a->replacement + a->replacementlen, + a->instr, + a->instrlen); + } +} + +static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) +{ + struct alt_instr *a; + + for (a = start; a < end; a++) { + memcpy(a->instr, + a->replacement + a->replacementlen, + a->instrlen); + } +} + +static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = 0xf0; /* lock prefix */ + }; +} + +static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end) +{ + unsigned char **noptable = find_nop_table(); + u8 **ptr; + + for (ptr = start; ptr < end; ptr++) { + if (*ptr < text) + continue; + if (*ptr > text_end) + continue; + **ptr = noptable[1][0]; + }; +} + +struct smp_alt_module { + /* what is this ??? */ + struct module *mod; + char *name; + + /* ptrs to lock prefixes */ + u8 **locks; + u8 **locks_end; + + /* .text segment, needed to avoid patching init code ;) */ + u8 *text; + u8 *text_end; + + struct list_head next; +}; +static LIST_HEAD(smp_alt_modules); +static DEFINE_SPINLOCK(smp_alt); + +static int smp_alt_once = 0; +static int __init bootonly(char *str) +{ + smp_alt_once = 1; + return 1; +} +__setup("smp-alt-boot", bootonly); + +void alternatives_smp_module_add(struct module *mod, char *name, + void *locks, void *locks_end, + void *text, void *text_end) +{ + struct smp_alt_module *smp; + unsigned long flags; + + if (smp_alt_once) { + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(locks, locks_end, + text, text_end); + return; + } + + smp = kzalloc(sizeof(*smp), GFP_KERNEL); + if (NULL == smp) + return; /* we'll run the (safe but slow) SMP code then ... */ + + smp->mod = mod; + smp->name = name; + smp->locks = locks; + smp->locks_end = locks_end; + smp->text = text; + smp->text_end = text_end; + DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n", + __FUNCTION__, smp->locks, smp->locks_end, + smp->text, smp->text_end, smp->name); + + spin_lock_irqsave(&smp_alt, flags); + list_add_tail(&smp->next, &smp_alt_modules); + if (boot_cpu_has(X86_FEATURE_UP)) + alternatives_smp_unlock(smp->locks, smp->locks_end, + smp->text, smp->text_end); + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_module_del(struct module *mod) +{ + struct smp_alt_module *item; + unsigned long flags; + + if (smp_alt_once) + return; + + spin_lock_irqsave(&smp_alt, flags); + list_for_each_entry(item, &smp_alt_modules, next) { + if (mod != item->mod) + continue; + list_del(&item->next); + spin_unlock_irqrestore(&smp_alt, flags); + DPRINTK("%s: %s\n", __FUNCTION__, item->name); + kfree(item); + return; + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void alternatives_smp_switch(int smp) +{ + struct smp_alt_module *mod; + unsigned long flags; + + if (smp_alt_once) + return; + BUG_ON(!smp && (num_online_cpus() > 1)); + + spin_lock_irqsave(&smp_alt, flags); + if (smp) { + printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); + clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + alternatives_smp_apply(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_lock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } else { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + list_for_each_entry(mod, &smp_alt_modules, next) + alternatives_smp_unlock(mod->locks, mod->locks_end, + mod->text, mod->text_end); + } + spin_unlock_irqrestore(&smp_alt, flags); +} + +void __init alternative_instructions(void) +{ + apply_alternatives(__alt_instructions, __alt_instructions_end); + + /* switch to patch-once-at-boottime-only mode and free the + * tables in case we know the number of CPUs will never ever + * change */ +#ifdef CONFIG_HOTPLUG_CPU + if (num_possible_cpus() < 2) + smp_alt_once = 1; +#else + smp_alt_once = 1; +#endif + + if (smp_alt_once) { + if (1 == num_possible_cpus()) { + printk(KERN_INFO "SMP alternatives: switching to UP code\n"); + set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); + set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); + apply_alternatives(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_unlock(__smp_locks, __smp_locks_end, + _text, _etext); + } + free_init_pages("SMP alternatives", + (unsigned long)__smp_alt_begin, + (unsigned long)__smp_alt_end); + } else { + alternatives_smp_save(__smp_alt_instructions, + __smp_alt_instructions_end); + alternatives_smp_module_add(NULL, "core kernel", + __smp_locks, __smp_locks_end, + _text, _etext); + alternatives_smp_switch(0); + } +} diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 776c909..eb5279d 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c @@ -38,6 +38,7 @@ #include <asm/i8253.h> #include <mach_apic.h> +#include <mach_apicdef.h> #include <mach_ipi.h> #include "io_ports.h" diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index f52669e..bd75629 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c @@ -4,6 +4,7 @@ #include <asm/processor.h> #include <asm/msr.h> #include <asm/e820.h> +#include <asm/mtrr.h> #include "cpu.h" #ifdef CONFIG_X86_OOSTORE diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index e6bd095..7e3d6b6 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c @@ -25,9 +25,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]); EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack); -static int cachesize_override __devinitdata = -1; -static int disable_x86_fxsr __devinitdata = 0; -static int disable_x86_serial_nr __devinitdata = 1; +static int cachesize_override __cpuinitdata = -1; +static int disable_x86_fxsr __cpuinitdata; +static int disable_x86_serial_nr __cpuinitdata = 1; +static int disable_x86_sep __cpuinitdata; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; @@ -59,7 +60,7 @@ static int __init cachesize_setup(char *str) } __setup("cachesize=", cachesize_setup); -int __devinit get_model_name(struct cpuinfo_x86 *c) +int __cpuinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -89,7 +90,7 @@ int __devinit get_model_name(struct cpuinfo_x86 *c) } -void __devinit display_cacheinfo(struct cpuinfo_x86 *c) +void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -130,7 +131,7 @@ void __devinit display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) +static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -151,7 +152,7 @@ static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) } -static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -187,6 +188,14 @@ static int __init x86_fxsr_setup(char * s) __setup("nofxsr", x86_fxsr_setup); +static int __init x86_sep_setup(char * s) +{ + disable_x86_sep = 1; + return 1; +} +__setup("nosep", x86_sep_setup); + + /* Standard macro to see if a specific flag is changeable */ static inline int flag_is_changeable_p(u32 flag) { @@ -210,7 +219,7 @@ static inline int flag_is_changeable_p(u32 flag) /* Probe for the CPUID instruction */ -static int __devinit have_cpuid_p(void) +static int __cpuinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } @@ -254,7 +263,7 @@ static void __init early_cpu_detect(void) } } -void __devinit generic_identify(struct cpuinfo_x86 * c) +void __cpuinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int junk; @@ -307,7 +316,7 @@ void __devinit generic_identify(struct cpuinfo_x86 * c) #endif } -static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -335,7 +344,7 @@ __setup("serialnumber", x86_serial_nr_setup); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __devinit identify_cpu(struct cpuinfo_x86 *c) +void __cpuinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -405,6 +414,10 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) clear_bit(X86_FEATURE_XMM, c->x86_capability); } + /* SEP disabled? */ + if (disable_x86_sep) + clear_bit(X86_FEATURE_SEP, c->x86_capability); + if (disable_pse) clear_bit(X86_FEATURE_PSE, c->x86_capability); @@ -417,7 +430,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) else /* Last resort... */ sprintf(c->x86_model_id, "%02x/%02x", - c->x86_vendor, c->x86_model); + c->x86, c->x86_model); } /* Now the feature flags better reflect actual CPU features! */ @@ -453,7 +466,7 @@ void __devinit identify_cpu(struct cpuinfo_x86 *c) } #ifdef CONFIG_X86_HT -void __devinit detect_ht(struct cpuinfo_x86 *c) +void __cpuinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; int index_msb, core_bits; @@ -500,7 +513,7 @@ void __devinit detect_ht(struct cpuinfo_x86 *c) } #endif -void __devinit print_cpu_info(struct cpuinfo_x86 *c) +void __cpuinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -523,7 +536,7 @@ void __devinit print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -570,7 +583,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __devinit cpu_init(void) +void __cpuinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct * t = &per_cpu(init_tss, cpu); @@ -670,7 +683,7 @@ void __devinit cpu_init(void) } #ifdef CONFIG_HOTPLUG_CPU -void __devinit cpu_uninit(void) +void __cpuinit cpu_uninit(void) { int cpu = raw_smp_processor_id(); cpu_clear(cpu, cpu_initialized); diff --git a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c index e11a092..3d5110b 100644 --- a/arch/i386/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/i386/kernel/cpu/cpufreq/powernow-k8.c @@ -1145,9 +1145,7 @@ static int __cpuinit powernowk8_init(void) { unsigned int i, supported_cpus = 0; - for (i=0; i<NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_cpu(i) { if (check_supported_cpu(i)) supported_cpus++; } diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 8c01201..5386b29 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c @@ -29,7 +29,7 @@ extern int trap_init_f00f_bug(void); struct movsl_mask movsl_mask __read_mostly; #endif -void __devinit early_intel_workaround(struct cpuinfo_x86 *c) +void __cpuinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -44,7 +44,7 @@ void __devinit early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __devinit ppro_with_ram_bug(void) +int __cpuinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -62,7 +62,7 @@ int __devinit ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -81,7 +81,7 @@ static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) +static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx; @@ -96,7 +96,7 @@ static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) return 1; } -static void __devinit init_intel(struct cpuinfo_x86 *c) +static void __cpuinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -205,7 +205,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __devinitdata = { +static struct cpu_dev intel_cpu_dev __cpuinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index ffe58ce..ce61921 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -174,7 +174,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - if (c->cpuid_level > 4) { + if (c->cpuid_level > 3) { static int is_initialized; if (is_initialized == 0) { @@ -330,7 +330,7 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) } } } -static void __devinit cache_remove_shared_cpu_map(unsigned int cpu, int index) +static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) { struct _cpuid4_info *this_leaf, *sibling_leaf; int sibling; diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 89a85af..5cfbd80 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c @@ -40,7 +40,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) /* Other (Linux-defined) */ "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr", NULL, NULL, NULL, NULL, - "constant_tsc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "constant_tsc", "up", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index d49dbe8..e3c5fca0 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -105,7 +105,7 @@ static int crash_nmi_callback(struct pt_regs *regs, int cpu) return 1; local_irq_disable(); - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { crash_fixup_ss_esp(&fixed_regs, regs); regs = &fixed_regs; } diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index c9cad7b..7ec6cfa 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c @@ -115,7 +115,7 @@ static void efi_call_phys_epilog(void) unsigned long cr4; struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); - cpu_gdt_descr->address = __va(cpu_gdt_descr->address); + cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); load_gdt(cpu_gdt_descr); cr4 = read_cr4(); @@ -543,7 +543,7 @@ efi_initialize_iomem_resources(struct resource *code_resource, if ((md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (md->type) { case EFI_RESERVED_TYPE: res->name = "Reserved Memory"; diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 4d70472..cfc683f 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -226,6 +226,10 @@ ENTRY(system_call) pushl %eax # save orig_eax SAVE_ALL GET_THREAD_INFO(%ebp) + testl $TF_MASK,EFLAGS(%esp) + jz no_singlestep + orl $_TIF_SINGLESTEP,TI_flags(%ebp) +no_singlestep: # system call tracing in operation / emulation /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index e0b7c63..3debc2e 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -450,7 +450,6 @@ int_msg: .globl boot_gdt_descr .globl idt_descr -.globl cpu_gdt_descr ALIGN # early boot GDT descriptor (must use 1:1 address mapping) @@ -470,8 +469,6 @@ cpu_gdt_descr: .word GDT_ENTRIES*8-1 .long cpu_gdt_table - .fill NR_CPUS-1,8,0 # space for the other GDT descriptors - /* * The boot_gdt_table must mirror the equivalent in setup.S and is * used only for booting. @@ -485,7 +482,7 @@ ENTRY(boot_gdt_table) /* * The Global Descriptor Table contains 28 quadwords, per-CPU. */ - .align PAGE_SIZE_asm + .align L1_CACHE_BYTES ENTRY(cpu_gdt_table) .quad 0x0000000000000000 /* NULL descriptor */ .quad 0x0000000000000000 /* 0x0b reserved */ diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 39d9a5f..311b4e7 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c @@ -351,8 +351,8 @@ static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold) { int i, j; Dprintk("Rotating IRQs among CPUs.\n"); - for (i = 0; i < NR_CPUS; i++) { - for (j = 0; cpu_online(i) && (j < NR_IRQS); j++) { + for_each_online_cpu(i) { + for (j = 0; j < NR_IRQS; j++) { if (!irq_desc[j].action) continue; /* Is it a significant load ? */ @@ -381,7 +381,7 @@ static void do_irq_balance(void) unsigned long imbalance = 0; cpumask_t allowed_mask, target_cpu_mask, tmp; - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { int package_index; CPU_IRQ(i) = 0; if (!cpu_online(i)) @@ -422,9 +422,7 @@ static void do_irq_balance(void) } } /* Find the least loaded processor package */ - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (min_cpu_irq > CPU_IRQ(i)) { @@ -441,9 +439,7 @@ tryanothercpu: */ tmp_cpu_irq = 0; tmp_loaded = -1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { if (i != CPU_TO_PACKAGEINDEX(i)) continue; if (max_cpu_irq <= CPU_IRQ(i)) @@ -619,9 +615,7 @@ static int __init balanced_irq_init(void) if (smp_num_siblings > 1 && !cpus_empty(tmp)) physical_balance = 1; - for (i = 0; i < NR_CPUS; i++) { - if (!cpu_online(i)) - continue; + for_each_online_cpu(i) { irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL); if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) { @@ -638,9 +632,11 @@ static int __init balanced_irq_init(void) else printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); failed: - for (i = 0; i < NR_CPUS; i++) { + for_each_cpu(i) { kfree(irq_cpu_data[i].irq_delta); + irq_cpu_data[i].irq_delta = NULL; kfree(irq_cpu_data[i].last_irq); + irq_cpu_data[i].last_irq = NULL; } return 0; } @@ -1761,7 +1757,8 @@ static void __init setup_ioapic_ids_from_mpc(void) * Don't check I/O APIC IDs for xAPIC systems. They have * no meaning without the serial APIC bus. */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && boot_cpu_data.x86 < 15)) + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) return; /* * This is broken; anything with a real cpu count has to diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index 694a139..7a59050 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -84,9 +84,9 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) void __kprobes arch_remove_kprobe(struct kprobe *p) { - down(&kprobe_mutex); + mutex_lock(&kprobe_mutex); free_insn_slot(p->ainsn.insn); - up(&kprobe_mutex); + mutex_unlock(&kprobe_mutex); } static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) diff --git a/arch/i386/kernel/module.c b/arch/i386/kernel/module.c index 5149c8a..470cf97 100644 --- a/arch/i386/kernel/module.c +++ b/arch/i386/kernel/module.c @@ -104,26 +104,38 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, return -ENOEXEC; } -extern void apply_alternatives(void *start, void *end); - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - const Elf_Shdr *s; + const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - /* look for .altinstructions to patch */ for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { - void *seg; - if (strcmp(".altinstructions", secstrings + s->sh_name)) - continue; - seg = (void *)s->sh_addr; - apply_alternatives(seg, seg + s->sh_size); - } + if (!strcmp(".text", secstrings + s->sh_name)) + text = s; + if (!strcmp(".altinstructions", secstrings + s->sh_name)) + alt = s; + if (!strcmp(".smp_locks", secstrings + s->sh_name)) + locks= s; + } + + if (alt) { + /* patch .altinstructions */ + void *aseg = (void *)alt->sh_addr; + apply_alternatives(aseg, aseg + alt->sh_size); + } + if (locks && text) { + void *lseg = (void *)locks->sh_addr; + void *tseg = (void *)text->sh_addr; + alternatives_smp_module_add(me, me->name, + lseg, lseg + locks->sh_size, + tseg, tseg + text->sh_size); + } return 0; } void module_arch_cleanup(struct module *mod) { + alternatives_smp_module_del(mod); } diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index e6e2f43..8d8aa9d 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c @@ -828,6 +828,8 @@ void __init find_smp_config (void) smp_scan_config(address, 0x400); } +int es7000_plat; + /* -------------------------------------------------------------------------- ACPI-based MP Configuration -------------------------------------------------------------------------- */ @@ -935,7 +937,8 @@ void __init mp_register_ioapic ( mp_ioapics[idx].mpc_apicaddr = address; set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); - if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && (boot_cpu_data.x86 < 15)) + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) tmpid = io_apic_get_unique_id(idx, id); else tmpid = id; @@ -1011,8 +1014,6 @@ void __init mp_override_legacy_irq ( return; } -int es7000_plat; - void __init mp_config_acpi_legacy_irqs (void) { struct mpc_config_intsrc intsrc; diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index be87c5e..9074818 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -143,7 +143,7 @@ static int __init check_nmi_watchdog(void) local_irq_enable(); mdelay((10*1000)/nmi_hz); // wait 10 ticks - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_cpu(cpu) { #ifdef CONFIG_SMP /* Check cpu_callin_map here because that is set after the timer is started. */ @@ -510,7 +510,7 @@ void touch_nmi_watchdog (void) * Just reset the alert counters, (other CPUs might be * spinning on locks we hold): */ - for (i = 0; i < NR_CPUS; i++) + for_each_cpu(i) alert_counter[i] = 0; /* @@ -543,7 +543,7 @@ void nmi_watchdog_tick (struct pt_regs * regs) /* * die_nmi will return ONLY if NOTIFY_STOP happens.. */ - die_nmi(regs, "NMI Watchdog detected LOCKUP"); + die_nmi(regs, "BUG: NMI Watchdog detected LOCKUP"); } else { last_irq_sums[cpu] = sum; alert_counter[cpu] = 0; diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 0480454..299e616 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -295,7 +295,7 @@ void show_regs(struct pt_regs * regs) printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode(regs)) + if (user_mode_vm(regs)) printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); printk(" EFLAGS: %08lx %s (%s %.*s)\n", regs->eflags, print_tainted(), system_utsname.release, diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index 5c1fb6a..506462e 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -34,10 +34,10 @@ /* * Determines which flags the user has access to [1 = access, 0 = no access]. - * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). + * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), NT(14), IOPL(12-13), IF(9). * Also masks reserved bits (31-22, 15, 5, 3, 1). */ -#define FLAG_MASK 0x00054dd5 +#define FLAG_MASK 0x00050dd5 /* set's the trap flag. */ #define TRAP_FLAG 0x100 diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c index 7455ab6..967dc74 100644 --- a/arch/i386/kernel/semaphore.c +++ b/arch/i386/kernel/semaphore.c @@ -110,11 +110,11 @@ asm( ".align 4\n" ".globl __write_lock_failed\n" "__write_lock_failed:\n\t" - LOCK "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" + LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" "1: rep; nop\n\t" "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jne 1b\n\t" - LOCK "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" + LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" "jnz __write_lock_failed\n\t" "ret" ); @@ -124,11 +124,11 @@ asm( ".align 4\n" ".globl __read_lock_failed\n" "__read_lock_failed:\n\t" - LOCK "incl (%eax)\n" + LOCK_PREFIX "incl (%eax)\n" "1: rep; nop\n\t" "cmpl $1,(%eax)\n\t" "js 1b\n\t" - LOCK "decl (%eax)\n\t" + LOCK_PREFIX "decl (%eax)\n\t" "js __read_lock_failed\n\t" "ret" ); diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index ab62a9f..d313a11 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -1288,7 +1288,7 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat struct resource *res; if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL) continue; - res = alloc_bootmem_low(sizeof(struct resource)); + res = kzalloc(sizeof(struct resource), GFP_ATOMIC); switch (e820.map[i].type) { case E820_RAM: res->name = "System RAM"; break; case E820_ACPI: res->name = "ACPI Tables"; break; @@ -1316,13 +1316,15 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources + * + * This is called just before pcibios_assign_resources(), which is also + * an fs_initcall, but is linked in later (in arch/i386/pci/i386.c). */ -static void __init register_memory(void) +static int __init request_standard_resources(void) { - unsigned long gapstart, gapsize, round; - unsigned long long last; - int i; + int i; + printk("Setting up standard PCI resources\n"); if (efi_enabled) efi_initialize_iomem_resources(&code_resource, &data_resource); else @@ -1334,6 +1336,16 @@ static void __init register_memory(void) /* request I/O space for devices used on all i[345]86 PCs */ for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); + return 0; +} + +fs_initcall(request_standard_resources); + +static void __init register_memory(void) +{ + unsigned long gapstart, gapsize, round; + unsigned long long last; + int i; /* * Search for the bigest gap in the low 32 bits of the e820 @@ -1377,101 +1389,6 @@ static void __init register_memory(void) pci_mem_start, gapstart, gapsize); } -/* Use inline assembly to define this because the nops are defined - as inline assembly strings in the include files and we cannot - get them easily into strings. */ -asm("\t.data\nintelnops: " - GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 - GENERIC_NOP7 GENERIC_NOP8); -asm("\t.data\nk8nops: " - K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 - K8_NOP7 K8_NOP8); -asm("\t.data\nk7nops: " - K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 - K7_NOP7 K7_NOP8); - -extern unsigned char intelnops[], k8nops[], k7nops[]; -static unsigned char *intel_nops[ASM_NOP_MAX+1] = { - NULL, - intelnops, - intelnops + 1, - intelnops + 1 + 2, - intelnops + 1 + 2 + 3, - intelnops + 1 + 2 + 3 + 4, - intelnops + 1 + 2 + 3 + 4 + 5, - intelnops + 1 + 2 + 3 + 4 + 5 + 6, - intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k8_nops[ASM_NOP_MAX+1] = { - NULL, - k8nops, - k8nops + 1, - k8nops + 1 + 2, - k8nops + 1 + 2 + 3, - k8nops + 1 + 2 + 3 + 4, - k8nops + 1 + 2 + 3 + 4 + 5, - k8nops + 1 + 2 + 3 + 4 + 5 + 6, - k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static unsigned char *k7_nops[ASM_NOP_MAX+1] = { - NULL, - k7nops, - k7nops + 1, - k7nops + 1 + 2, - k7nops + 1 + 2 + 3, - k7nops + 1 + 2 + 3 + 4, - k7nops + 1 + 2 + 3 + 4 + 5, - k7nops + 1 + 2 + 3 + 4 + 5 + 6, - k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, -}; -static struct nop { - int cpuid; - unsigned char **noptable; -} noptypes[] = { - { X86_FEATURE_K8, k8_nops }, - { X86_FEATURE_K7, k7_nops }, - { -1, NULL } -}; - -/* Replace instructions with better alternatives for this CPU type. - - This runs before SMP is initialized to avoid SMP problems with - self modifying code. This implies that assymetric systems where - APs have less capabilities than the boot processor are not handled. - Tough. Make sure you disable such features by hand. */ -void apply_alternatives(void *start, void *end) -{ - struct alt_instr *a; - int diff, i, k; - unsigned char **noptable = intel_nops; - for (i = 0; noptypes[i].cpuid >= 0; i++) { - if (boot_cpu_has(noptypes[i].cpuid)) { - noptable = noptypes[i].noptable; - break; - } - } - for (a = start; (void *)a < end; a++) { - if (!boot_cpu_has(a->cpuid)) - continue; - BUG_ON(a->replacementlen > a->instrlen); - memcpy(a->instr, a->replacement, a->replacementlen); - diff = a->instrlen - a->replacementlen; - /* Pad the rest with nops */ - for (i = a->replacementlen; diff > 0; diff -= k, i += k) { - k = diff; - if (k > ASM_NOP_MAX) - k = ASM_NOP_MAX; - memcpy(a->instr + i, noptable[k], k); - } - } -} - -void __init alternative_instructions(void) -{ - extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; - apply_alternatives(__alt_instructions, __alt_instructions_end); -} - static char * __init machine_specific_memory_setup(void); #ifdef CONFIG_MCA @@ -1554,6 +1471,16 @@ void __init setup_arch(char **cmdline_p) parse_cmdline_early(cmdline_p); +#ifdef CONFIG_EARLY_PRINTK + { + char *s = strstr(*cmdline_p, "earlyprintk="); + if (s) { + setup_early_printk(strchr(s, '=') + 1); + printk("early console enabled\n"); + } + } +#endif + max_low_pfn = setup_memory(); /* @@ -1578,19 +1505,6 @@ void __init setup_arch(char **cmdline_p) * NOTE: at this point the bootmem allocator is fully available. */ -#ifdef CONFIG_EARLY_PRINTK - { - char *s = strstr(*cmdline_p, "earlyprintk="); - if (s) { - extern void setup_early_printk(char *); - - setup_early_printk(strchr(s, '=') + 1); - printk("early console enabled\n"); - } - } -#endif - - dmi_scan_machine(); #ifdef CONFIG_X86_GENERICARCH diff --git a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c index 963616d..5c352c3 100644 --- a/arch/i386/kernel/signal.c +++ b/arch/i386/kernel/signal.c @@ -123,7 +123,8 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax err |= __get_user(tmp, &sc->seg); \ loadsegment(seg,tmp); } -#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | X86_EFLAGS_DF | \ +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_RF | \ + X86_EFLAGS_OF | X86_EFLAGS_DF | \ X86_EFLAGS_TF | X86_EFLAGS_SF | X86_EFLAGS_ZF | \ X86_EFLAGS_AF | X86_EFLAGS_PF | X86_EFLAGS_CF) @@ -582,9 +583,6 @@ static void fastcall do_signal(struct pt_regs *regs) if (!user_mode(regs)) return; - if (try_to_freeze()) - goto no_signal; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) oldset = ¤t->saved_sigmask; else @@ -613,7 +611,6 @@ static void fastcall do_signal(struct pt_regs *regs) return; } -no_signal: /* Did we come from a system call? */ if (regs->orig_eax >= 0) { /* Restart the system call - no handlers present */ diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 218d725..d134e96 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c @@ -504,27 +504,23 @@ void unlock_ipi_call_lock(void) spin_unlock_irq(&call_lock); } -static struct call_data_struct * call_data; - -/* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ - -int smp_call_function (void (*func) (void *info), void *info, int nonatomic, - int wait) -/* - * [SUMMARY] Run a function on all other CPUs. - * <func> The function to run. This must be fast and non-blocking. - * <info> An arbitrary pointer to pass to the function. - * <nonatomic> currently unused. - * <wait> If true, wait (atomically) until function has completed on other CPUs. - * [RETURNS] 0 on success, else a negative status code. Does not return until +static struct call_data_struct *call_data; + +/** + * smp_call_function(): Run a function on all other CPUs. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: currently unused. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. Does not return until * remote CPUs are nearly ready to execute <<func>> or are or have executed. * * You must not call this function with disabled interrupts or from a * hardware interrupt handler or from a bottom half handler. */ +int smp_call_function (void (*func) (void *info), void *info, int nonatomic, + int wait) { struct call_data_struct data; int cpus; diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 7007e17..4c470e9 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -899,6 +899,7 @@ static int __devinit do_boot_cpu(int apicid, int cpu) unsigned short nmi_high = 0, nmi_low = 0; ++cpucount; + alternatives_smp_switch(1); /* * We can't use kernel_thread since we must avoid to @@ -1368,6 +1369,8 @@ void __cpu_die(unsigned int cpu) /* They ack this in play_dead by setting CPU_DEAD */ if (per_cpu(cpu_state, cpu) == CPU_DEAD) { printk ("CPU %d is now offline\n", cpu); + if (1 == num_online_cpus()) + alternatives_smp_switch(0); return; } msleep(100); diff --git a/arch/i386/kernel/sys_i386.c b/arch/i386/kernel/sys_i386.c index a4a6197..8fdb1fb 100644 --- a/arch/i386/kernel/sys_i386.c +++ b/arch/i386/kernel/sys_i386.c @@ -40,14 +40,13 @@ asmlinkage int sys_pipe(unsigned long __user * fildes) return error; } -/* common code for old and new mmaps */ -static inline long do_mmap2( - unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) +asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { int error = -EBADF; - struct file * file = NULL; + struct file *file = NULL; + struct mm_struct *mm = current->mm; flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); if (!(flags & MAP_ANONYMOUS)) { @@ -56,9 +55,9 @@ static inline long do_mmap2( goto out; } - down_write(¤t->mm->mmap_sem); + down_write(&mm->mmap_sem); error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); - up_write(¤t->mm->mmap_sem); + up_write(&mm->mmap_sem); if (file) fput(file); @@ -66,13 +65,6 @@ out: return error; } -asmlinkage long sys_mmap2(unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, - unsigned long fd, unsigned long pgoff) -{ - return do_mmap2(addr, len, prot, flags, fd, pgoff); -} - /* * Perform the select(nd, in, out, ex, tv) and mmap() system * calls. Linux/i386 didn't use to be able to handle more than @@ -101,7 +93,8 @@ asmlinkage int old_mmap(struct mmap_arg_struct __user *arg) if (a.offset & ~PAGE_MASK) goto out; - err = do_mmap2(a.addr, a.len, a.prot, a.flags, a.fd, a.offset >> PAGE_SHIFT); + err = sys_mmap2(a.addr, a.len, a.prot, a.flags, + a.fd, a.offset >> PAGE_SHIFT); out: return err; } diff --git a/arch/i386/kernel/timers/timer_hpet.c b/arch/i386/kernel/timers/timer_hpet.c index be24272..17a6fe7 100644 --- a/arch/i386/kernel/timers/timer_hpet.c +++ b/arch/i386/kernel/timers/timer_hpet.c @@ -46,7 +46,7 @@ static seqlock_t monotonic_lock = SEQLOCK_UNLOCKED; * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale; +static unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) diff --git a/arch/i386/kernel/timers/timer_tsc.c b/arch/i386/kernel/timers/timer_tsc.c index a7f5a2a..5e41ee2 100644 --- a/arch/i386/kernel/timers/timer_tsc.c +++ b/arch/i386/kernel/timers/timer_tsc.c @@ -74,7 +74,7 @@ late_initcall(start_lost_tick_compensation); * * -johnstul@us.ibm.com "math is hard, lets go shopping!" */ -static unsigned long cyc2ns_scale; +static unsigned long cyc2ns_scale __read_mostly; #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ static inline void set_cyc2ns_scale(unsigned long cpu_khz) diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index 67a0e1b..2963552 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c @@ -41,6 +41,15 @@ int arch_register_cpu(int num){ parent = &node_devices[node].node; #endif /* CONFIG_NUMA */ + /* + * CPU0 cannot be offlined due to several + * restrictions and assumptions in kernel. This basically + * doesnt add a control file, one cannot attempt to offline + * BSP. + */ + if (!num) + cpu_devices[num].cpu.no_control = 1; + return register_cpu(&cpu_devices[num].cpu, num, parent); } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index b814dbd..de5386b 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -99,6 +99,8 @@ int register_die_notifier(struct notifier_block *nb) { int err = 0; unsigned long flags; + + vmalloc_sync_all(); spin_lock_irqsave(&die_notifier_lock, flags); err = notifier_chain_register(&i386die_chain, nb); spin_unlock_irqrestore(&die_notifier_lock, flags); @@ -112,12 +114,30 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) p < (void *)tinfo + THREAD_SIZE - 3; } -static void print_addr_and_symbol(unsigned long addr, char *log_lvl) +/* + * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. + */ +static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, + int printed) { - printk(log_lvl); + if (!printed) + printk(log_lvl); + +#if CONFIG_STACK_BACKTRACE_COLS == 1 printk(" [<%08lx>] ", addr); +#else + printk(" <%08lx> ", addr); +#endif print_symbol("%s", addr); - printk("\n"); + + printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; + + if (printed) + printk(" "); + else + printk("\n"); + + return printed; } static inline unsigned long print_context_stack(struct thread_info *tinfo, @@ -125,20 +145,24 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, char *log_lvl) { unsigned long addr; + int printed = 0; /* nr of entries already printed on current line */ #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); ebp = *(unsigned long *)ebp; } #else while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - print_addr_and_symbol(addr, log_lvl); + printed = print_addr_and_symbol(addr, log_lvl, printed); } #endif + if (printed) + printk("\n"); + return ebp; } @@ -166,8 +190,7 @@ static void show_trace_log_lvl(struct task_struct *task, stack = (unsigned long*)context->previous_esp; if (!stack) break; - printk(log_lvl); - printk(" =======================\n"); + printk("%s =======================\n", log_lvl); } } @@ -194,21 +217,17 @@ static void show_stack_log_lvl(struct task_struct *task, unsigned long *esp, for(i = 0; i < kstack_depth_to_print; i++) { if (kstack_end(stack)) break; - if (i && ((i % 8) == 0)) { - printk("\n"); - printk(log_lvl); - printk(" "); - } + if (i && ((i % 8) == 0)) + printk("\n%s ", log_lvl); printk("%08lx ", *stack++); } - printk("\n"); - printk(log_lvl); - printk("Call Trace:\n"); + printk("\n%sCall Trace:\n", log_lvl); show_trace_log_lvl(task, esp, log_lvl); } void show_stack(struct task_struct *task, unsigned long *esp) { + printk(" "); show_stack_log_lvl(task, esp, ""); } @@ -233,7 +252,7 @@ void show_registers(struct pt_regs *regs) esp = (unsigned long) (®s->esp); savesegment(ss, ss); - if (user_mode(regs)) { + if (user_mode_vm(regs)) { in_kernel = 0; esp = regs->esp; ss = regs->xss & 0xffff; @@ -333,6 +352,8 @@ void die(const char * str, struct pt_regs * regs, long err) static int die_counter; unsigned long flags; + oops_enter(); + if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); spin_lock_irqsave(&die.lock, flags); @@ -385,6 +406,7 @@ void die(const char * str, struct pt_regs * regs, long err) ssleep(5); panic("Fatal exception"); } + oops_exit(); do_exit(SIGSEGV); } @@ -623,7 +645,7 @@ void die_nmi (struct pt_regs *regs, const char *msg) /* If we are in kernel we are probably nested up pretty bad * and might aswell get out now while we still can. */ - if (!user_mode(regs)) { + if (!user_mode_vm(regs)) { current->thread.trap_no = 2; crash_kexec(regs); } @@ -694,6 +716,7 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) void set_nmi_callback(nmi_callback_t callback) { + vmalloc_sync_all(); rcu_assign_pointer(nmi_callback, callback); } EXPORT_SYMBOL_GPL(set_nmi_callback); diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 4710195..3f21c6f 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S @@ -68,6 +68,26 @@ SECTIONS *(.data.init_task) } + /* might get freed after init */ + . = ALIGN(4096); + __smp_alt_begin = .; + __smp_alt_instructions = .; + .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { + *(.smp_altinstructions) + } + __smp_alt_instructions_end = .; + . = ALIGN(4); + __smp_locks = .; + .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { + *(.smp_locks) + } + __smp_locks_end = .; + .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { + *(.smp_altinstr_replacement) + } + . = ALIGN(4096); + __smp_alt_end = .; + /* will be freed after init */ . = ALIGN(4096); /* Init code and data */ __init_begin = .; diff --git a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S index 76b7281..3b62baa 100644 --- a/arch/i386/kernel/vsyscall-sysenter.S +++ b/arch/i386/kernel/vsyscall-sysenter.S @@ -21,6 +21,9 @@ * instruction clobbers %esp, the user's %esp won't even survive entry * into the kernel. We store %esp in %ebp. Code in entry.S must fetch * arg6 from the stack. + * + * You can not use this vsyscall for the clone() syscall because the + * three dwords on the parent stack do not get copied to the child. */ .text .globl __kernel_vsyscall |