diff options
Diffstat (limited to 'arch/i386/kernel')
28 files changed, 443 insertions, 318 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index cbc1184..5427a84 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -9,6 +9,7 @@ obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ pci-dma.o i386_ksyms.o i387.o bootflag.o \ quirks.o i8237.o topology.o alternative.o i8253.o tsc.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-y += cpu/ obj-y += acpi/ obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o @@ -38,7 +39,6 @@ obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_K8_NB) += k8.o -obj-$(CONFIG_AUDIT) += audit.o EXTRA_AFLAGS := -traditional @@ -58,7 +58,8 @@ quiet_cmd_syscall = SYSCALL $@ export CPPFLAGS_vsyscall.lds += -P -C -U$(ARCH) -vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 +vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) SYSCFLAGS_vsyscall-sysenter.so = $(vsyscall-flags) SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags) diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 0db6387..ee003bc 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c @@ -59,7 +59,7 @@ static inline int gsi_irq_sharing(int gsi) { return gsi; } #define BAD_MADT_ENTRY(entry, end) ( \ (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ - ((acpi_table_entry_header *)entry)->length != sizeof(*entry)) + ((acpi_table_entry_header *)entry)->length < sizeof(*entry)) #define PREFIX "ACPI: " diff --git a/arch/i386/kernel/acpi/wakeup.S b/arch/i386/kernel/acpi/wakeup.S index 9f408ee..b781b38 100644 --- a/arch/i386/kernel/acpi/wakeup.S +++ b/arch/i386/kernel/acpi/wakeup.S @@ -292,7 +292,10 @@ ENTRY(do_suspend_lowlevel) pushl $3 call acpi_enter_sleep_state addl $4, %esp - ret + +# In case of S3 failure, we'll emerge here. Jump +# to ret_point to recover + jmp ret_point .p2align 4,,7 ret_point: call restore_registers diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 7b421b3..28ab806 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c @@ -303,6 +303,16 @@ void alternatives_smp_switch(int smp) struct smp_alt_module *mod; unsigned long flags; +#ifdef CONFIG_LOCKDEP + /* + * A not yet fixed binutils section handling bug prevents + * alternatives-replacement from working reliably, so turn + * it off: + */ + printk("lockdep: not fixing up alternatives.\n"); + return; +#endif + if (no_replacement || smp_alt_once) return; BUG_ON(!smp && (num_online_cpus() > 1)); diff --git a/arch/i386/kernel/audit.c b/arch/i386/kernel/audit.c deleted file mode 100644 index 5a53c6f..0000000 --- a/arch/i386/kernel/audit.c +++ /dev/null @@ -1,23 +0,0 @@ -#include <linux/init.h> -#include <linux/types.h> -#include <linux/audit.h> -#include <asm/unistd.h> - -static unsigned dir_class[] = { -#include <asm-generic/audit_dir_write.h> -~0U -}; - -static unsigned chattr_class[] = { -#include <asm-generic/audit_change_attr.h> -~0U -}; - -static int __init audit_classes_init(void) -{ - audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); - audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); - return 0; -} - -__initcall(audit_classes_init); diff --git a/arch/i386/kernel/cpu/cpufreq/Kconfig b/arch/i386/kernel/cpu/cpufreq/Kconfig index e44a4c6..ccc1edf 100644 --- a/arch/i386/kernel/cpu/cpufreq/Kconfig +++ b/arch/i386/kernel/cpu/cpufreq/Kconfig @@ -96,6 +96,7 @@ config X86_POWERNOW_K8_ACPI config X86_GX_SUSPMOD tristate "Cyrix MediaGX/NatSemi Geode Suspend Modulation" + depends on PCI help This add the CPUFreq driver for NatSemi Geode processors which support suspend modulation. @@ -202,7 +203,7 @@ config X86_LONGRUN config X86_LONGHAUL tristate "VIA Cyrix III Longhaul" select CPU_FREQ_TABLE - depends on BROKEN + depends on ACPI_PROCESSOR help This adds the CPUFreq driver for VIA Samuel/CyrixIII, VIA Cyrix Samuel/C3, VIA Cyrix Ezra and VIA Cyrix Ezra-T diff --git a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c index 567b39b..e6ea00e 100644 --- a/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c @@ -384,8 +384,7 @@ static int acpi_cpufreq_early_init_acpi(void) } /* Do initialization in ACPI core */ - acpi_processor_preregister_performance(acpi_perf_data); - return 0; + return acpi_processor_preregister_performance(acpi_perf_data); } static int @@ -568,16 +567,11 @@ static struct cpufreq_driver acpi_cpufreq_driver = { static int __init acpi_cpufreq_init (void) { - int result = 0; - dprintk("acpi_cpufreq_init\n"); - result = acpi_cpufreq_early_init_acpi(); + acpi_cpufreq_early_init_acpi(); - if (!result) - result = cpufreq_register_driver(&acpi_cpufreq_driver); - - return (result); + return cpufreq_register_driver(&acpi_cpufreq_driver); } diff --git a/arch/i386/kernel/cpu/cpufreq/longhaul.c b/arch/i386/kernel/cpu/cpufreq/longhaul.c index 146f607..4f2c3ae 100644 --- a/arch/i386/kernel/cpu/cpufreq/longhaul.c +++ b/arch/i386/kernel/cpu/cpufreq/longhaul.c @@ -29,11 +29,13 @@ #include <linux/cpufreq.h> #include <linux/slab.h> #include <linux/string.h> -#include <linux/pci.h> #include <asm/msr.h> #include <asm/timex.h> #include <asm/io.h> +#include <asm/acpi.h> +#include <linux/acpi.h> +#include <acpi/processor.h> #include "longhaul.h" @@ -56,6 +58,8 @@ static int minvid, maxvid; static unsigned int minmult, maxmult; static int can_scale_voltage; static int vrmrev; +static struct acpi_processor *pr = NULL; +static struct acpi_processor_cx *cx = NULL; /* Module parameters */ static int dont_scale_voltage; @@ -118,84 +122,65 @@ static int longhaul_get_cpu_mult(void) return eblcr_table[invalue]; } +/* For processor with BCR2 MSR */ -static void do_powersaver(union msr_longhaul *longhaul, - unsigned int clock_ratio_index) +static void do_longhaul1(int cx_address, unsigned int clock_ratio_index) { - struct pci_dev *dev; - unsigned long flags; - unsigned int tmp_mask; - int version; - int i; - u16 pci_cmd; - u16 cmd_state[64]; + union msr_bcr2 bcr2; + u32 t; - switch (cpu_model) { - case CPU_EZRA_T: - version = 3; - break; - case CPU_NEHEMIAH: - version = 0xf; - break; - default: - return; - } + rdmsrl(MSR_VIA_BCR2, bcr2.val); + /* Enable software clock multiplier */ + bcr2.bits.ESOFTBF = 1; + bcr2.bits.CLOCKMUL = clock_ratio_index; - rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); - longhaul->bits.SoftBusRatio = clock_ratio_index & 0xf; - longhaul->bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; - longhaul->bits.EnableSoftBusRatio = 1; - longhaul->bits.RevisionKey = 0; + /* Sync to timer tick */ + safe_halt(); + ACPI_FLUSH_CPU_CACHE(); + /* Change frequency on next halt or sleep */ + wrmsrl(MSR_VIA_BCR2, bcr2.val); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_fadt.xpm_tmr_blk.address); + + /* Disable software clock multiplier */ + local_irq_disable(); + rdmsrl(MSR_VIA_BCR2, bcr2.val); + bcr2.bits.ESOFTBF = 0; + wrmsrl(MSR_VIA_BCR2, bcr2.val); +} - preempt_disable(); - local_irq_save(flags); +/* For processor with Longhaul MSR */ - /* - * get current pci bus master state for all devices - * and clear bus master bit - */ - dev = NULL; - i = 0; - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (dev != NULL) { - pci_read_config_word(dev, PCI_COMMAND, &pci_cmd); - cmd_state[i++] = pci_cmd; - pci_cmd &= ~PCI_COMMAND_MASTER; - pci_write_config_word(dev, PCI_COMMAND, pci_cmd); - } - } while (dev != NULL); +static void do_powersaver(int cx_address, unsigned int clock_ratio_index) +{ + union msr_longhaul longhaul; + u32 t; - tmp_mask=inb(0x21); /* works on C3. save mask. */ - outb(0xFE,0x21); /* TMR0 only */ - outb(0xFF,0x80); /* delay */ + rdmsrl(MSR_VIA_LONGHAUL, longhaul.val); + longhaul.bits.RevisionKey = longhaul.bits.RevisionID; + longhaul.bits.SoftBusRatio = clock_ratio_index & 0xf; + longhaul.bits.SoftBusRatio4 = (clock_ratio_index & 0x10) >> 4; + longhaul.bits.EnableSoftBusRatio = 1; + /* Sync to timer tick */ safe_halt(); - wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); - halt(); - + ACPI_FLUSH_CPU_CACHE(); + /* Change frequency on next halt or sleep */ + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); + /* Invoke C3 */ + inb(cx_address); + /* Dummy op - must do something useless after P_LVL3 read */ + t = inl(acpi_fadt.xpm_tmr_blk.address); + + /* Disable bus ratio bit */ local_irq_disable(); - - outb(tmp_mask,0x21); /* restore mask */ - - /* restore pci bus master state for all devices */ - dev = NULL; - i = 0; - do { - dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); - if (dev != NULL) { - pci_cmd = cmd_state[i++]; - pci_write_config_byte(dev, PCI_COMMAND, pci_cmd); - } - } while (dev != NULL); - local_irq_restore(flags); - preempt_enable(); - - /* disable bus ratio bit */ - rdmsrl(MSR_VIA_LONGHAUL, longhaul->val); - longhaul->bits.EnableSoftBusRatio = 0; - longhaul->bits.RevisionKey = version; - wrmsrl(MSR_VIA_LONGHAUL, longhaul->val); + longhaul.bits.RevisionKey = longhaul.bits.RevisionID; + longhaul.bits.EnableSoftBusRatio = 0; + longhaul.bits.EnableSoftBSEL = 0; + longhaul.bits.EnableSoftVID = 0; + wrmsrl(MSR_VIA_LONGHAUL, longhaul.val); } /** @@ -209,9 +194,9 @@ static void longhaul_setstate(unsigned int clock_ratio_index) { int speed, mult; struct cpufreq_freqs freqs; - union msr_longhaul longhaul; - union msr_bcr2 bcr2; static unsigned int old_ratio=-1; + unsigned long flags; + unsigned int pic1_mask, pic2_mask; if (old_ratio == clock_ratio_index) return; @@ -234,6 +219,20 @@ static void longhaul_setstate(unsigned int clock_ratio_index) dprintk ("Setting to FSB:%dMHz Mult:%d.%dx (%s)\n", fsb, mult/10, mult%10, print_speed(speed/1000)); + preempt_disable(); + local_irq_save(flags); + + pic2_mask = inb(0xA1); + pic1_mask = inb(0x21); /* works on C3. save mask. */ + outb(0xFF,0xA1); /* Overkill */ + outb(0xFE,0x21); /* TMR0 only */ + + /* Disable bus master arbitration */ + if (pr->flags.bm_check) { + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 1, + ACPI_MTX_DO_NOT_LOCK); + } + switch (longhaul_version) { /* @@ -245,20 +244,7 @@ static void longhaul_setstate(unsigned int clock_ratio_index) */ case TYPE_LONGHAUL_V1: case TYPE_LONGHAUL_V2: - rdmsrl (MSR_VIA_BCR2, bcr2.val); - /* Enable software clock multiplier */ - bcr2.bits.ESOFTBF = 1; - bcr2.bits.CLOCKMUL = clock_ratio_index; - local_irq_disable(); - wrmsrl (MSR_VIA_BCR2, bcr2.val); - safe_halt(); - - /* Disable software clock multiplier */ - rdmsrl (MSR_VIA_BCR2, bcr2.val); - bcr2.bits.ESOFTBF = 0; - local_irq_disable(); - wrmsrl (MSR_VIA_BCR2, bcr2.val); - local_irq_enable(); + do_longhaul1(cx->address, clock_ratio_index); break; /* @@ -273,10 +259,22 @@ static void longhaul_setstate(unsigned int clock_ratio_index) * to work in practice. */ case TYPE_POWERSAVER: - do_powersaver(&longhaul, clock_ratio_index); + do_powersaver(cx->address, clock_ratio_index); break; } + /* Enable bus master arbitration */ + if (pr->flags.bm_check) { + acpi_set_register(ACPI_BITREG_ARB_DISABLE, 0, + ACPI_MTX_DO_NOT_LOCK); + } + + outb(pic2_mask,0xA1); /* restore mask */ + outb(pic1_mask,0x21); + + local_irq_restore(flags); + preempt_enable(); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); } @@ -324,9 +322,11 @@ static int guess_fsb(void) static int __init longhaul_get_ranges(void) { unsigned long invalue; - unsigned int multipliers[32]= { - 50,30,40,100,55,35,45,95,90,70,80,60,120,75,85,65, - -1,110,120,-1,135,115,125,105,130,150,160,140,-1,155,-1,145 }; + unsigned int ezra_t_multipliers[32]= { + 90, 30, 40, 100, 55, 35, 45, 95, + 50, 70, 80, 60, 120, 75, 85, 65, + -1, 110, 120, -1, 135, 115, 125, 105, + 130, 150, 160, 140, -1, 155, -1, 145 }; unsigned int j, k = 0; union msr_longhaul longhaul; unsigned long lo, hi; @@ -355,13 +355,13 @@ static int __init longhaul_get_ranges(void) invalue = longhaul.bits.MaxMHzBR; if (longhaul.bits.MaxMHzBR4) invalue += 16; - maxmult=multipliers[invalue]; + maxmult=ezra_t_multipliers[invalue]; invalue = longhaul.bits.MinMHzBR; if (longhaul.bits.MinMHzBR4 == 1) minmult = 30; else - minmult = multipliers[invalue]; + minmult = ezra_t_multipliers[invalue]; fsb = eblcr_fsb_table_v2[longhaul.bits.MaxMHzFSB]; break; } @@ -527,6 +527,18 @@ static unsigned int longhaul_get(unsigned int cpu) return calc_speed(longhaul_get_cpu_mult()); } +static acpi_status longhaul_walk_callback(acpi_handle obj_handle, + u32 nesting_level, + void *context, void **return_value) +{ + struct acpi_device *d; + + if ( acpi_bus_get_device(obj_handle, &d) ) { + return 0; + } + *return_value = (void *)acpi_driver_data(d); + return 1; +} static int __init longhaul_cpu_init(struct cpufreq_policy *policy) { @@ -534,6 +546,15 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) char *cpuname=NULL; int ret; + /* Check ACPI support for C3 state */ + acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, ACPI_UINT32_MAX, + &longhaul_walk_callback, NULL, (void *)&pr); + if (pr == NULL) goto err_acpi; + + cx = &pr->power.states[ACPI_STATE_C3]; + if (cx->address == 0 || cx->latency > 1000) goto err_acpi; + + /* Now check what we have on this motherboard */ switch (c->x86_model) { case 6: cpu_model = CPU_SAMUEL; @@ -634,6 +655,10 @@ static int __init longhaul_cpu_init(struct cpufreq_policy *policy) cpufreq_frequency_table_get_attr(longhaul_table, policy->cpu); return 0; + +err_acpi: + printk(KERN_ERR PFX "No ACPI support for CPU frequency changes.\n"); + return -ENODEV; } static int __devexit longhaul_cpu_exit(struct cpufreq_policy *policy) @@ -666,6 +691,18 @@ static int __init longhaul_init(void) if (c->x86_vendor != X86_VENDOR_CENTAUR || c->x86 != 6) return -ENODEV; +#ifdef CONFIG_SMP + if (num_online_cpus() > 1) { + return -ENODEV; + printk(KERN_ERR PFX "More than 1 CPU detected, longhaul disabled.\n"); + } +#endif +#ifdef CONFIG_X86_IO_APIC + if (cpu_has_apic) { + printk(KERN_ERR PFX "APIC detected. Longhaul is currently broken in this configuration.\n"); + return -ENODEV; + } +#endif switch (c->x86_model) { case 6 ... 9: return cpufreq_register_driver(&longhaul_driver); @@ -699,6 +736,6 @@ MODULE_AUTHOR ("Dave Jones <davej@codemonkey.org.uk>"); MODULE_DESCRIPTION ("Longhaul driver for VIA Cyrix processors."); MODULE_LICENSE ("GPL"); -module_init(longhaul_init); +late_initcall(longhaul_init); module_exit(longhaul_exit); diff --git a/arch/i386/kernel/cpu/intel_cacheinfo.c b/arch/i386/kernel/cpu/intel_cacheinfo.c index e9f0b92..5c43be4 100644 --- a/arch/i386/kernel/cpu/intel_cacheinfo.c +++ b/arch/i386/kernel/cpu/intel_cacheinfo.c @@ -759,7 +759,7 @@ static int __cpuinit cache_sysfs_init(void) if (num_cache_leaves == 0) return 0; - register_cpu_notifier(&cacheinfo_cpu_notifier); + register_hotcpu_notifier(&cacheinfo_cpu_notifier); for_each_online_cpu(i) { cacheinfo_cpu_callback(&cacheinfo_cpu_notifier, CPU_ONLINE, diff --git a/arch/i386/kernel/cpu/mcheck/mce.h b/arch/i386/kernel/cpu/mcheck/mce.h index dc2416d..84fd4cf 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.h +++ b/arch/i386/kernel/cpu/mcheck/mce.h @@ -9,6 +9,6 @@ void winchip_mcheck_init(struct cpuinfo_x86 *c); /* Call the installed machine check handler for this CPU setup. */ extern fastcall void (*machine_check_vector)(struct pt_regs *, long error_code); -extern int mce_disabled __initdata; +extern int mce_disabled; extern int nr_mce_banks; diff --git a/arch/i386/kernel/cpuid.c b/arch/i386/kernel/cpuid.c index a8d3ecd..fde8bea 100644 --- a/arch/i386/kernel/cpuid.c +++ b/arch/i386/kernel/cpuid.c @@ -167,6 +167,7 @@ static int cpuid_class_device_create(int i) return err; } +#ifdef CONFIG_HOTPLUG_CPU static int cpuid_class_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; @@ -186,6 +187,7 @@ static struct notifier_block __cpuinitdata cpuid_class_cpu_notifier = { .notifier_call = cpuid_class_cpu_callback, }; +#endif /* !CONFIG_HOTPLUG_CPU */ static int __init cpuid_init(void) { @@ -208,7 +210,7 @@ static int __init cpuid_init(void) if (err != 0) goto out_class; } - register_cpu_notifier(&cpuid_class_cpu_notifier); + register_hotcpu_notifier(&cpuid_class_cpu_notifier); err = 0; goto out; @@ -233,7 +235,7 @@ static void __exit cpuid_exit(void) class_device_destroy(cpuid_class, MKDEV(CPUID_MAJOR, cpu)); class_destroy(cpuid_class); unregister_chrdev(CPUID_MAJOR, "cpu/cpuid"); - unregister_cpu_notifier(&cpuid_class_cpu_notifier); + unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); } module_init(cpuid_init); diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 48f0f62..5b96f03 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c @@ -90,7 +90,7 @@ static void crash_save_self(struct pt_regs *regs) crash_save_this_cpu(regs, cpu); } -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) static atomic_t waiting_for_crash_ipi; static int crash_nmi_callback(struct pt_regs *regs, int cpu) diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 787190c..37a7d2e 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S @@ -42,6 +42,7 @@ #include <linux/linkage.h> #include <asm/thread_info.h> +#include <asm/irqflags.h> #include <asm/errno.h> #include <asm/segment.h> #include <asm/smp.h> @@ -76,12 +77,21 @@ NT_MASK = 0x00004000 VM_MASK = 0x00020000 #ifdef CONFIG_PREEMPT -#define preempt_stop cli +#define preempt_stop cli; TRACE_IRQS_OFF #else #define preempt_stop #define resume_kernel restore_nocheck #endif +.macro TRACE_IRQS_IRET +#ifdef CONFIG_TRACE_IRQFLAGS + testl $IF_MASK,EFLAGS(%esp) # interrupts off? + jz 1f + TRACE_IRQS_ON +1: +#endif +.endm + #ifdef CONFIG_VM86 #define resume_userspace_sig check_userspace #else @@ -194,7 +204,7 @@ VM_MASK = 0x00020000 ENTRY(ret_from_fork) CFI_STARTPROC pushl %eax - CFI_ADJUST_CFA_OFFSET -4 + CFI_ADJUST_CFA_OFFSET 4 call schedule_tail GET_THREAD_INFO(%ebp) popl %eax @@ -257,6 +267,10 @@ ENTRY(sysenter_entry) CFI_REGISTER esp, ebp movl TSS_sysenter_esp0(%esp),%esp sysenter_past_esp: + /* + * No need to follow this irqs on/off section: the syscall + * disabled irqs and here we enable it straight after entry: + */ sti pushl $(__USER_DS) CFI_ADJUST_CFA_OFFSET 4 @@ -303,6 +317,7 @@ sysenter_past_esp: call *sys_call_table(,%eax,4) movl %eax,EAX(%esp) cli + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx jne syscall_exit_work @@ -310,6 +325,7 @@ sysenter_past_esp: movl EIP(%esp), %edx movl OLDESP(%esp), %ecx xorl %ebp,%ebp + TRACE_IRQS_ON sti sysexit CFI_ENDPROC @@ -339,6 +355,7 @@ syscall_exit: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx testw $_TIF_ALLWORK_MASK, %cx # current->work jne syscall_exit_work @@ -355,12 +372,15 @@ restore_all: CFI_REMEMBER_STATE je ldt_ss # returning to user-space with LDT SS restore_nocheck: + TRACE_IRQS_IRET +restore_nocheck_notrace: RESTORE_REGS addl $4, %esp CFI_ADJUST_CFA_OFFSET -4 1: iret .section .fixup,"ax" iret_exc: + TRACE_IRQS_ON sti pushl $0 # no error code pushl $do_iret_error @@ -386,11 +406,13 @@ ldt_ss: subl $8, %esp # reserve space for switch16 pointer CFI_ADJUST_CFA_OFFSET 8 cli + TRACE_IRQS_OFF movl %esp, %eax /* Set up the 16bit stack frame with switch32 pointer on top, * and a switch16 pointer on top of the current frame. */ call setup_x86_bogus_stack CFI_ADJUST_CFA_OFFSET -8 # frame has moved + TRACE_IRQS_IRET RESTORE_REGS lss 20+4(%esp), %esp # switch to 16bit stack 1: iret @@ -411,6 +433,7 @@ work_resched: cli # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret + TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx andl $_TIF_WORK_MASK, %ecx # is there any work to be done other # than syscall tracing? @@ -462,6 +485,7 @@ syscall_trace_entry: syscall_exit_work: testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl jz work_pending + TRACE_IRQS_ON sti # could let do_syscall_trace() call # schedule() instead movl %esp, %eax @@ -535,9 +559,14 @@ ENTRY(irq_entries_start) vector=vector+1 .endr +/* + * the CPU automatically disables interrupts when executing an IRQ vector, + * so IRQ-flags tracing has to follow that: + */ ALIGN common_interrupt: SAVE_ALL + TRACE_IRQS_OFF movl %esp,%eax call do_IRQ jmp ret_from_intr @@ -549,9 +578,10 @@ ENTRY(name) \ pushl $~(nr); \ CFI_ADJUST_CFA_OFFSET 4; \ SAVE_ALL; \ + TRACE_IRQS_OFF \ movl %esp,%eax; \ call smp_/**/name; \ - jmp ret_from_intr; \ + jmp ret_from_intr; \ CFI_ENDPROC /* The include is where all of the SMP etc. interrupts come from */ @@ -726,7 +756,7 @@ nmi_stack_correct: xorl %edx,%edx # zero error code movl %esp,%eax # pt_regs pointer call do_nmi - jmp restore_all + jmp restore_nocheck_notrace CFI_ENDPROC nmi_stack_fixup: diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index eb79aa2..a6b8bd8 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -317,20 +317,14 @@ is386: movl $2,%ecx # set MP movl %eax,%gs lldt %ax cld # gcc2 wants the direction flag cleared at all times + pushl %eax # fake return address #ifdef CONFIG_SMP movb ready, %cl movb $1, ready - cmpb $0,%cl - je 1f # the first CPU calls start_kernel - # all other CPUs call initialize_secondary - call initialize_secondary - jmp L6 -1: + cmpb $0,%cl # the first CPU calls start_kernel + jne initialize_secondary # all other CPUs call initialize_secondary #endif /* CONFIG_SMP */ - call start_kernel -L6: - jmp L6 # main should never return here, but - # just in case, we know what happens. + jmp start_kernel /* * We depend on ET to be correct. This checks for 287/387. diff --git a/arch/i386/kernel/hpet.c b/arch/i386/kernel/hpet.c index c6737c3..17647a5 100644 --- a/arch/i386/kernel/hpet.c +++ b/arch/i386/kernel/hpet.c @@ -35,7 +35,7 @@ static int __init init_hpet_clocksource(void) void __iomem* hpet_base; u64 tmp; - if (!hpet_address) + if (!is_hpet_enabled()) return -ENODEV; /* calculate the hpet address: */ diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index 79026f0..498e8bc 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c @@ -79,6 +79,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) memset(bitmap, 0xff, IO_BITMAP_BYTES); t->io_bitmap_ptr = bitmap; + set_thread_flag(TIF_IO_BITMAP); } /* diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 16b4917..5fe547c 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -82,10 +82,6 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) } #endif - if (!irq_desc[irq].handle_irq) { - __do_IRQ(irq, regs); - goto out_exit; - } #ifdef CONFIG_4KSTACKS curctx = (union irq_ctx *) current_thread_info(); @@ -125,7 +121,6 @@ fastcall unsigned int do_IRQ(struct pt_regs *regs) #endif __do_IRQ(irq, regs); -out_exit: irq_exit(); return 1; @@ -166,7 +161,7 @@ void irq_ctx_init(int cpu) irqctx->tinfo.task = NULL; irqctx->tinfo.exec_domain = NULL; irqctx->tinfo.cpu = cpu; - irqctx->tinfo.preempt_count = SOFTIRQ_OFFSET; + irqctx->tinfo.preempt_count = 0; irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); softirq_ctx[cpu] = irqctx; @@ -211,6 +206,10 @@ asmlinkage void do_softirq(void) : "0"(isp) : "memory", "cc", "edx", "ecx", "eax" ); + /* + * Shouldnt happen, we returned above if in_interrupt(): + */ + WARN_ON_ONCE(softirq_count()); } local_irq_restore(flags); diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c index de2e16e..afe6505 100644 --- a/arch/i386/kernel/kprobes.c +++ b/arch/i386/kernel/kprobes.c @@ -256,11 +256,6 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) int ret = 0; kprobe_opcode_t *addr; struct kprobe_ctlblk *kcb; -#ifdef CONFIG_PREEMPT - unsigned pre_preempt_count = preempt_count(); -#else - unsigned pre_preempt_count = 1; -#endif addr = (kprobe_opcode_t *)(regs->eip - sizeof(kprobe_opcode_t)); @@ -338,13 +333,15 @@ static int __kprobes kprobe_handler(struct pt_regs *regs) return 1; ss_probe: - if (pre_preempt_count && p->ainsn.boostable == 1 && !p->post_handler){ +#ifndef CONFIG_PREEMPT + if (p->ainsn.boostable == 1 && !p->post_handler){ /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); regs->eip = (unsigned long)p->ainsn.insn; preempt_enable_no_resched(); return 1; } +#endif prepare_singlestep(p, regs); kcb->kprobe_status = KPROBE_HIT_SS; return 1; diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 511abe5..6b1ae6b 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c @@ -189,14 +189,11 @@ NORET_TYPE void machine_kexec(struct kimage *image) memcpy((void *)reboot_code_buffer, relocate_new_kernel, relocate_new_kernel_size); - /* The segment registers are funny things, they are - * automatically loaded from a table, in memory wherever you - * set them to a specific selector, but this table is never - * accessed again you set the segment to a different selector. - * - * The more common model is are caches where the behide - * the scenes work is done, but is also dropped at arbitrary - * times. + /* The segment registers are funny things, they have both a + * visible and an invisible part. Whenever the visible part is + * set to a specific selector, the invisible part is loaded + * with from a table in memory. At no other time is the + * descriptor table in memory accessed. * * I take advantage of this here by force loading the * segments, before I zap the gdt with an invalid value. diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index a76e931..acb3514 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c @@ -107,7 +107,7 @@ int nmi_active; static __init void nmi_cpu_busy(void *data) { volatile int *endflag = data; - local_irq_enable(); + local_irq_enable_in_hardirq(); /* Intentionally don't use cpu_relax here. This is to make sure that the performance counter really ticks, even if there is a simulator or similar that catches the @@ -575,6 +575,7 @@ void touch_nmi_watchdog (void) */ touch_softlockup_watchdog(); } +EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 94e2c87..8657c73 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c @@ -359,16 +359,16 @@ EXPORT_SYMBOL(kernel_thread); */ void exit_thread(void) { - struct task_struct *tsk = current; - struct thread_struct *t = &tsk->thread; - /* The process may have allocated an io port bitmap... nuke it. */ - if (unlikely(NULL != t->io_bitmap_ptr)) { + if (unlikely(test_thread_flag(TIF_IO_BITMAP))) { + struct task_struct *tsk = current; + struct thread_struct *t = &tsk->thread; int cpu = get_cpu(); struct tss_struct *tss = &per_cpu(init_tss, cpu); kfree(t->io_bitmap_ptr); t->io_bitmap_ptr = NULL; + clear_thread_flag(TIF_IO_BITMAP); /* * Careful, clear this in the TSS too: */ @@ -387,6 +387,7 @@ void flush_thread(void) memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); + clear_tsk_thread_flag(tsk, TIF_DEBUG); /* * Forget coprocessor state.. */ @@ -431,7 +432,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, savesegment(gs,p->thread.gs); tsk = current; - if (unlikely(NULL != tsk->thread.io_bitmap_ptr)) { + if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL); if (!p->thread.io_bitmap_ptr) { p->thread.io_bitmap_max = 0; @@ -439,6 +440,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long esp, } memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr, IO_BITMAP_BYTES); + set_tsk_thread_flag(p, TIF_IO_BITMAP); } /* @@ -533,10 +535,24 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) return 1; } -static inline void -handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss) +static noinline void __switch_to_xtra(struct task_struct *next_p, + struct tss_struct *tss) { - if (!next->io_bitmap_ptr) { + struct thread_struct *next; + + next = &next_p->thread; + + if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { + set_debugreg(next->debugreg[0], 0); + set_debugreg(next->debugreg[1], 1); + set_debugreg(next->debugreg[2], 2); + set_debugreg(next->debugreg[3], 3); + /* no 4 and 5 */ + set_debugreg(next->debugreg[6], 6); + set_debugreg(next->debugreg[7], 7); + } + + if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Disable the bitmap via an invalid offset. We still cache * the previous bitmap owner and the IO bitmap contents: @@ -544,6 +560,7 @@ handle_io_bitmap(struct thread_struct *next, struct tss_struct *tss) tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; return; } + if (likely(next == tss->io_bitmap_owner)) { /* * Previous owner of the bitmap (hence the bitmap content) @@ -671,20 +688,11 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas set_iopl_mask(next->iopl); /* - * Now maybe reload the debug registers + * Now maybe handle debug registers and/or IO bitmaps */ - if (unlikely(next->debugreg[7])) { - set_debugreg(next->debugreg[0], 0); - set_debugreg(next->debugreg[1], 1); - set_debugreg(next->debugreg[2], 2); - set_debugreg(next->debugreg[3], 3); - /* no 4 and 5 */ - set_debugreg(next->debugreg[6], 6); - set_debugreg(next->debugreg[7], 7); - } - - if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) - handle_io_bitmap(next, tss); + if (unlikely((task_thread_info(next_p)->flags & _TIF_WORK_CTXSW) + || test_tsk_thread_flag(prev_p, TIF_IO_BITMAP))) + __switch_to_xtra(next_p, tss); disable_tsc(prev_p, next_p); diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index fd7eaf7..d3db03f 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c @@ -468,8 +468,11 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) for(i=0; i<4; i++) if ((0x5f54 >> ((data >> (16 + 4*i)) & 0xf)) & 1) goto out_tsk; + if (data) + set_tsk_thread_flag(child, TIF_DEBUG); + else + clear_tsk_thread_flag(child, TIF_DEBUG); } - addr -= (long) &dummy->u_debugreg; addr = addr >> 2; child->thread.debugreg[addr] = data; diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 08c00d2..345ffb7 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -26,7 +26,7 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/mmzone.h> -#include <linux/tty.h> +#include <linux/screen_info.h> #include <linux/ioport.h> #include <linux/acpi.h> #include <linux/apm_bios.h> @@ -956,38 +956,6 @@ efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg) return 0; } - /* - * This function checks if the entire range <start,end> is mapped with type. - * - * Note: this function only works correct if the e820 table is sorted and - * not-overlapping, which is the case - */ -int __init -e820_all_mapped(unsigned long s, unsigned long e, unsigned type) -{ - u64 start = s; - u64 end = e; - int i; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - if (type && ei->type != type) - continue; - /* is the region (part) in overlap with the current region ?*/ - if (ei->addr >= end || ei->addr + ei->size <= start) - continue; - /* if the region is at the beginning of <start,end> we move - * start to the end of the region since it's ok until there - */ - if (ei->addr <= start) - start = ei->addr + ei->size; - /* if start is now at or beyond end, we're done, full - * coverage */ - if (start >= end) - return 1; /* we're done */ - } - return 0; -} - /* * Find the highest page frame number we have available */ @@ -1327,7 +1295,10 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat res->start = e820.map[i].addr; res->end = res->start + e820.map[i].size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; - request_resource(&iomem_resource, res); + if (request_resource(&iomem_resource, res)) { + kfree(res); + continue; + } if (e820.map[i].type == E820_RAM) { /* * We don't know which RAM region contains kernel data, diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 6f5fea0..f948419 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -212,14 +212,20 @@ valid_k7: * then we print a warning if not, and always resync. */ -static atomic_t tsc_start_flag = ATOMIC_INIT(0); -static atomic_t tsc_count_start = ATOMIC_INIT(0); -static atomic_t tsc_count_stop = ATOMIC_INIT(0); -static unsigned long long tsc_values[NR_CPUS]; +static struct { + atomic_t start_flag; + atomic_t count_start; + atomic_t count_stop; + unsigned long long values[NR_CPUS]; +} tsc __initdata = { + .start_flag = ATOMIC_INIT(0), + .count_start = ATOMIC_INIT(0), + .count_stop = ATOMIC_INIT(0), +}; #define NR_LOOPS 5 -static void __init synchronize_tsc_bp (void) +static void __init synchronize_tsc_bp(void) { int i; unsigned long long t0; @@ -233,7 +239,7 @@ static void __init synchronize_tsc_bp (void) /* convert from kcyc/sec to cyc/usec */ one_usec = cpu_khz / 1000; - atomic_set(&tsc_start_flag, 1); + atomic_set(&tsc.start_flag, 1); wmb(); /* @@ -250,16 +256,16 @@ static void __init synchronize_tsc_bp (void) /* * all APs synchronize but they loop on '== num_cpus' */ - while (atomic_read(&tsc_count_start) != num_booting_cpus()-1) + while (atomic_read(&tsc.count_start) != num_booting_cpus()-1) cpu_relax(); - atomic_set(&tsc_count_stop, 0); + atomic_set(&tsc.count_stop, 0); wmb(); /* * this lets the APs save their current TSC: */ - atomic_inc(&tsc_count_start); + atomic_inc(&tsc.count_start); - rdtscll(tsc_values[smp_processor_id()]); + rdtscll(tsc.values[smp_processor_id()]); /* * We clear the TSC in the last loop: */ @@ -269,56 +275,54 @@ static void __init synchronize_tsc_bp (void) /* * Wait for all APs to leave the synchronization point: */ - while (atomic_read(&tsc_count_stop) != num_booting_cpus()-1) + while (atomic_read(&tsc.count_stop) != num_booting_cpus()-1) cpu_relax(); - atomic_set(&tsc_count_start, 0); + atomic_set(&tsc.count_start, 0); wmb(); - atomic_inc(&tsc_count_stop); + atomic_inc(&tsc.count_stop); } sum = 0; for (i = 0; i < NR_CPUS; i++) { if (cpu_isset(i, cpu_callout_map)) { - t0 = tsc_values[i]; + t0 = tsc.values[i]; sum += t0; } } avg = sum; do_div(avg, num_booting_cpus()); - sum = 0; for (i = 0; i < NR_CPUS; i++) { if (!cpu_isset(i, cpu_callout_map)) continue; - delta = tsc_values[i] - avg; + delta = tsc.values[i] - avg; if (delta < 0) delta = -delta; /* * We report bigger than 2 microseconds clock differences. */ if (delta > 2*one_usec) { - long realdelta; + long long realdelta; + if (!buggy) { buggy = 1; printk("\n"); } realdelta = delta; do_div(realdelta, one_usec); - if (tsc_values[i] < avg) + if (tsc.values[i] < avg) realdelta = -realdelta; - if (realdelta > 0) - printk(KERN_INFO "CPU#%d had %ld usecs TSC " + if (realdelta) + printk(KERN_INFO "CPU#%d had %Ld usecs TSC " "skew, fixed it up.\n", i, realdelta); } - - sum += delta; } if (!buggy) printk("passed.\n"); } -static void __init synchronize_tsc_ap (void) +static void __init synchronize_tsc_ap(void) { int i; @@ -327,20 +331,20 @@ static void __init synchronize_tsc_ap (void) * this gets called, so we first wait for the BP to * finish SMP initialization: */ - while (!atomic_read(&tsc_start_flag)) + while (!atomic_read(&tsc.start_flag)) cpu_relax(); for (i = 0; i < NR_LOOPS; i++) { - atomic_inc(&tsc_count_start); - while (atomic_read(&tsc_count_start) != num_booting_cpus()) + atomic_inc(&tsc.count_start); + while (atomic_read(&tsc.count_start) != num_booting_cpus()) cpu_relax(); - rdtscll(tsc_values[smp_processor_id()]); + rdtscll(tsc.values[smp_processor_id()]); if (i == NR_LOOPS-1) write_tsc(0, 0); - atomic_inc(&tsc_count_stop); - while (atomic_read(&tsc_count_stop) != num_booting_cpus()) + atomic_inc(&tsc.count_stop); + while (atomic_read(&tsc.count_stop) != num_booting_cpus()) cpu_relax(); } } diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c new file mode 100644 index 0000000..e62a037 --- /dev/null +++ b/arch/i386/kernel/stacktrace.c @@ -0,0 +1,98 @@ +/* + * arch/i386/kernel/stacktrace.c + * + * Stack trace management functions + * + * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> + */ +#include <linux/sched.h> +#include <linux/stacktrace.h> + +static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) +{ + return p > (void *)tinfo && + p < (void *)tinfo + THREAD_SIZE - 3; +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer: + */ +static inline unsigned long +save_context_stack(struct stack_trace *trace, unsigned int skip, + struct thread_info *tinfo, unsigned long *stack, + unsigned long ebp) +{ + unsigned long addr; + +#ifdef CONFIG_FRAME_POINTER + while (valid_stack_ptr(tinfo, (void *)ebp)) { + addr = *(unsigned long *)(ebp + 4); + if (!skip) + trace->entries[trace->nr_entries++] = addr; + else + skip--; + if (trace->nr_entries >= trace->max_entries) + break; + /* + * break out of recursive entries (such as + * end_of_stack_stop_unwind_function): + */ + if (ebp == *(unsigned long *)ebp) + break; + + ebp = *(unsigned long *)ebp; + } +#else + while (valid_stack_ptr(tinfo, stack)) { + addr = *stack++; + if (__kernel_text_address(addr)) { + if (!skip) + trace->entries[trace->nr_entries++] = addr; + else + skip--; + if (trace->nr_entries >= trace->max_entries) + break; + } + } +#endif + + return ebp; +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + * If all_contexts is set, all contexts (hardirq, softirq and process) + * are saved. If not set then only the current context is saved. + */ +void save_stack_trace(struct stack_trace *trace, + struct task_struct *task, int all_contexts, + unsigned int skip) +{ + unsigned long ebp; + unsigned long *stack = &ebp; + + WARN_ON(trace->nr_entries || !trace->max_entries); + + if (!task || task == current) { + /* Grab ebp right from our regs: */ + asm ("movl %%ebp, %0" : "=r" (ebp)); + } else { + /* ebp is the last reg pushed by switch_to(): */ + ebp = *(unsigned long *) task->thread.esp; + } + + while (1) { + struct thread_info *context = (struct thread_info *) + ((unsigned long)stack & (~(THREAD_SIZE - 1))); + + ebp = save_context_stack(trace, skip, context, stack, ebp); + stack = (unsigned long *)context->previous_esp; + if (!all_contexts || !stack || + trace->nr_entries >= trace->max_entries) + break; + trace->entries[trace->nr_entries++] = ULONG_MAX; + if (trace->nr_entries >= trace->max_entries) + break; + } +} + diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 316421a..edd00f6 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c @@ -135,7 +135,7 @@ unsigned long profile_pc(struct pt_regs *regs) { unsigned long pc = instruction_pointer(regs); - if (in_lock_functions(pc)) + if (!user_mode_vm(regs) && in_lock_functions(pc)) return *(unsigned long *)(regs->ebp + 4); return pc; @@ -206,15 +206,16 @@ irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) unsigned long get_cmos_time(void) { unsigned long retval; + unsigned long flags; - spin_lock(&rtc_lock); + spin_lock_irqsave(&rtc_lock, flags); if (efi_enabled) retval = efi_get_time(); else retval = mach_get_cmos_time(); - spin_unlock(&rtc_lock); + spin_unlock_irqrestore(&rtc_lock, flags); return retval; } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index e8c6086..7e9edaf 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -92,7 +92,11 @@ asmlinkage void spurious_interrupt_bug(void); asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; +#ifdef CONFIG_STACK_UNWIND static int call_trace = 1; +#else +#define call_trace (-1) +#endif ATOMIC_NOTIFIER_HEAD(i386die_chain); int register_die_notifier(struct notifier_block *nb) @@ -100,13 +104,13 @@ int register_die_notifier(struct notifier_block *nb) vmalloc_sync_all(); return atomic_notifier_chain_register(&i386die_chain, nb); } -EXPORT_SYMBOL(register_die_notifier); +EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */ int unregister_die_notifier(struct notifier_block *nb) { return atomic_notifier_chain_unregister(&i386die_chain, nb); } -EXPORT_SYMBOL(unregister_die_notifier); +EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -115,28 +119,13 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) } /* - * Print CONFIG_STACK_BACKTRACE_COLS address/symbol entries per line. + * Print one address/symbol entries per line. */ -static inline int print_addr_and_symbol(unsigned long addr, char *log_lvl, - int printed) +static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl) { - if (!printed) - printk(log_lvl); - -#if CONFIG_STACK_BACKTRACE_COLS == 1 printk(" [<%08lx>] ", addr); -#else - printk(" <%08lx> ", addr); -#endif - print_symbol("%s", addr); - printed = (printed + 1) % CONFIG_STACK_BACKTRACE_COLS; - if (printed) - printk(" "); - else - printk("\n"); - - return printed; + print_symbol("%s\n", addr); } static inline unsigned long print_context_stack(struct thread_info *tinfo, @@ -144,12 +133,11 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, char *log_lvl) { unsigned long addr; - int printed = 0; /* nr of entries already printed on current line */ #ifdef CONFIG_FRAME_POINTER while (valid_stack_ptr(tinfo, (void *)ebp)) { addr = *(unsigned long *)(ebp + 4); - printed = print_addr_and_symbol(addr, log_lvl, printed); + print_addr_and_symbol(addr, log_lvl); /* * break out of recursive entries (such as * end_of_stack_stop_unwind_function): @@ -162,28 +150,23 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, while (valid_stack_ptr(tinfo, stack)) { addr = *stack++; if (__kernel_text_address(addr)) - printed = print_addr_and_symbol(addr, log_lvl, printed); + print_addr_and_symbol(addr, log_lvl); } #endif - if (printed) - printk("\n"); - return ebp; } -static asmlinkage int show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) +static asmlinkage int +show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) { int n = 0; - int printed = 0; /* nr of entries already printed on current line */ while (unwind(info) == 0 && UNW_PC(info)) { - ++n; - printed = print_addr_and_symbol(UNW_PC(info), log_lvl, printed); + n++; + print_addr_and_symbol(UNW_PC(info), log_lvl); if (arch_unw_user_mode(info)) break; } - if (printed) - printk("\n"); return n; } @@ -209,10 +192,20 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, unw_ret = show_trace_unwind(&info, log_lvl); } if (unw_ret > 0) { - if (call_trace > 0) + if (call_trace == 1 && !arch_unw_user_mode(&info)) { + print_symbol("DWARF2 unwinder stuck at %s\n", + UNW_PC(&info)); + if (UNW_SP(&info) >= PAGE_OFFSET) { + printk("Leftover inexact backtrace:\n"); + stack = (void *)UNW_SP(&info); + } else + printk("Full inexact backtrace again:\n"); + } else if (call_trace >= 1) return; - printk("%sLegacy call trace:\n", log_lvl); - } + else + printk("Full inexact backtrace again:\n"); + } else + printk("Inexact backtrace:\n"); } if (task == current) { @@ -345,35 +338,35 @@ void show_registers(struct pt_regs *regs) static void handle_BUG(struct pt_regs *regs) { + unsigned long eip = regs->eip; unsigned short ud2; - unsigned short line; - char *file; - char c; - unsigned long eip; - - eip = regs->eip; if (eip < PAGE_OFFSET) - goto no_bug; + return; if (__get_user(ud2, (unsigned short __user *)eip)) - goto no_bug; + return; if (ud2 != 0x0b0f) - goto no_bug; - if (__get_user(line, (unsigned short __user *)(eip + 2))) - goto bug; - if (__get_user(file, (char * __user *)(eip + 4)) || - (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) - file = "<bad filename>"; + return; printk(KERN_EMERG "------------[ cut here ]------------\n"); - printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); -no_bug: - return; +#ifdef CONFIG_DEBUG_BUGVERBOSE + do { + unsigned short line; + char *file; + char c; + + if (__get_user(line, (unsigned short __user *)(eip + 2))) + break; + if (__get_user(file, (char * __user *)(eip + 4)) || + (unsigned long)file < PAGE_OFFSET || __get_user(c, file)) + file = "<bad filename>"; - /* Here we know it was a BUG but file-n-line is unavailable */ -bug: - printk(KERN_EMERG "Kernel BUG\n"); + printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line); + return; + } while (0); +#endif + printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n"); } /* This is gone through when something in the kernel @@ -463,11 +456,9 @@ void die(const char * str, struct pt_regs * regs, long err) if (in_interrupt()) panic("Fatal exception in interrupt"); - if (panic_on_oops) { - printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n"); - ssleep(5); + if (panic_on_oops) panic("Fatal exception"); - } + oops_exit(); do_exit(SIGSEGV); } @@ -1253,14 +1244,18 @@ static int __init kstack_setup(char *s) } __setup("kstack=", kstack_setup); +#ifdef CONFIG_STACK_UNWIND static int __init call_trace_setup(char *s) { if (strcmp(s, "old") == 0) call_trace = -1; else if (strcmp(s, "both") == 0) call_trace = 0; - else if (strcmp(s, "new") == 0) + else if (strcmp(s, "newfallback") == 0) call_trace = 1; + else if (strcmp(s, "new") == 2) + call_trace = 2; return 1; } __setup("call_trace=", call_trace_setup); +#endif diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index e26975f..f66cd11 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S @@ -10,6 +10,7 @@ SECTIONS . = VDSO_PRELINK + SIZEOF_HEADERS; .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } .dynsym : { *(.dynsym) } .dynstr : { *(.dynstr) } .gnu.version : { *(.gnu.version) } |